IPALLOCATION : If the node is held pinned down in "init" state
[sahlberg/ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334         TDB_DATA data;
335
336         if (status != 0) {
337                 if (status == -ETIME) {
338                         ctdb_ban_self(ctdb);
339                 }
340                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341                                  ctdb_addr_to_str(&state->vnn->public_address),
342                                  ctdb_vnn_iface_string(state->vnn)));
343                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
344                 talloc_free(state);
345                 return;
346         }
347
348         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
349         if (ret != 0) {
350                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
351                 talloc_free(state);
352                 return;
353         }
354
355         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
356         data.dsize = strlen((char *)data.dptr) + 1;
357         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
358
359         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
360
361
362         /* the control succeeded */
363         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
364         talloc_free(state);
365         return;
366 }
367
368 /*
369   take over an ip address
370  */
371 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
372                               struct ctdb_req_control *c,
373                               struct ctdb_vnn *vnn)
374 {
375         int ret;
376         struct ctdb_do_takeip_state *state;
377
378         ret = ctdb_vnn_assign_iface(ctdb, vnn);
379         if (ret != 0) {
380                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
381                                  "assin a usable interface\n",
382                                  ctdb_addr_to_str(&vnn->public_address),
383                                  vnn->public_netmask_bits));
384                 return -1;
385         }
386
387         state = talloc(vnn, struct ctdb_do_takeip_state);
388         CTDB_NO_MEMORY(ctdb, state);
389
390         state->c = talloc_steal(ctdb, c);
391         state->vnn   = vnn;
392
393         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
394                             ctdb_addr_to_str(&vnn->public_address),
395                             vnn->public_netmask_bits,
396                             ctdb_vnn_iface_string(vnn)));
397
398         ret = ctdb_event_script_callback(ctdb,
399                                          state,
400                                          ctdb_do_takeip_callback,
401                                          state,
402                                          false,
403                                          CTDB_EVENT_TAKE_IP,
404                                          "%s %s %u",
405                                          ctdb_vnn_iface_string(vnn),
406                                          ctdb_addr_to_str(&vnn->public_address),
407                                          vnn->public_netmask_bits);
408
409         if (ret != 0) {
410                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
411                         ctdb_addr_to_str(&vnn->public_address),
412                         ctdb_vnn_iface_string(vnn)));
413                 talloc_free(state);
414                 return -1;
415         }
416
417         return 0;
418 }
419
420 struct ctdb_do_updateip_state {
421         struct ctdb_req_control *c;
422         struct ctdb_iface *old;
423         struct ctdb_vnn *vnn;
424 };
425
426 /*
427   called when updateip event finishes
428  */
429 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
430                                       void *private_data)
431 {
432         struct ctdb_do_updateip_state *state =
433                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
434         int32_t ret;
435
436         if (status != 0) {
437                 if (status == -ETIME) {
438                         ctdb_ban_self(ctdb);
439                 }
440                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
441                         ctdb_addr_to_str(&state->vnn->public_address),
442                         state->old->name,
443                         ctdb_vnn_iface_string(state->vnn)));
444
445                 /*
446                  * All we can do is reset the old interface
447                  * and let the next run fix it
448                  */
449                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
450                 state->vnn->iface = state->old;
451                 state->vnn->iface->references++;
452
453                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
454                 talloc_free(state);
455                 return;
456         }
457
458         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
459         if (ret != 0) {
460                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
461                 talloc_free(state);
462                 return;
463         }
464
465         /* the control succeeded */
466         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
467         talloc_free(state);
468         return;
469 }
470
471 /*
472   update (move) an ip address
473  */
474 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
475                                 struct ctdb_req_control *c,
476                                 struct ctdb_vnn *vnn)
477 {
478         int ret;
479         struct ctdb_do_updateip_state *state;
480         struct ctdb_iface *old = vnn->iface;
481
482         ctdb_vnn_unassign_iface(ctdb, vnn);
483         ret = ctdb_vnn_assign_iface(ctdb, vnn);
484         if (ret != 0) {
485                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
486                                  "assin a usable interface (old iface '%s')\n",
487                                  ctdb_addr_to_str(&vnn->public_address),
488                                  vnn->public_netmask_bits,
489                                  old->name));
490                 return -1;
491         }
492
493         state = talloc(vnn, struct ctdb_do_updateip_state);
494         CTDB_NO_MEMORY(ctdb, state);
495
496         state->c = talloc_steal(ctdb, c);
497         state->old = old;
498         state->vnn = vnn;
499
500         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
501                             "interface %s to %s\n",
502                             ctdb_addr_to_str(&vnn->public_address),
503                             vnn->public_netmask_bits,
504                             old->name,
505                             ctdb_vnn_iface_string(vnn)));
506
507         ret = ctdb_event_script_callback(ctdb,
508                                          state,
509                                          ctdb_do_updateip_callback,
510                                          state,
511                                          false,
512                                          CTDB_EVENT_UPDATE_IP,
513                                          "%s %s %s %u",
514                                          state->old->name,
515                                          ctdb_vnn_iface_string(vnn),
516                                          ctdb_addr_to_str(&vnn->public_address),
517                                          vnn->public_netmask_bits);
518         if (ret != 0) {
519                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
520                                  ctdb_addr_to_str(&vnn->public_address),
521                                  old->name, ctdb_vnn_iface_string(vnn)));
522                 talloc_free(state);
523                 return -1;
524         }
525
526         return 0;
527 }
528
529 /*
530   Find the vnn of the node that has a public ip address
531   returns -1 if the address is not known as a public address
532  */
533 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
534 {
535         struct ctdb_vnn *vnn;
536
537         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
538                 if (ctdb_same_ip(&vnn->public_address, addr)) {
539                         return vnn;
540                 }
541         }
542
543         return NULL;
544 }
545
546 /*
547   take over an ip address
548  */
549 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
550                                  struct ctdb_req_control *c,
551                                  TDB_DATA indata,
552                                  bool *async_reply)
553 {
554         int ret;
555         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
556         struct ctdb_vnn *vnn;
557         bool have_ip = false;
558         bool do_updateip = false;
559         bool do_takeip = false;
560         struct ctdb_iface *best_iface = NULL;
561
562         if (pip->pnn != ctdb->pnn) {
563                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
564                                  "with pnn %d, but we're node %d\n",
565                                  ctdb_addr_to_str(&pip->addr),
566                                  pip->pnn, ctdb->pnn));
567                 return -1;
568         }
569
570         /* update out vnn list */
571         vnn = find_public_ip_vnn(ctdb, &pip->addr);
572         if (vnn == NULL) {
573                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
574                         ctdb_addr_to_str(&pip->addr)));
575                 return 0;
576         }
577
578         have_ip = ctdb_sys_have_ip(&pip->addr);
579         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
580         if (best_iface == NULL) {
581                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
582                                  "a usable interface (old %s, have_ip %d)\n",
583                                  ctdb_addr_to_str(&vnn->public_address),
584                                  vnn->public_netmask_bits,
585                                  ctdb_vnn_iface_string(vnn),
586                                  have_ip));
587                 return -1;
588         }
589
590         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
591                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
592                 have_ip = false;
593         }
594
595         if (vnn->iface == NULL && have_ip) {
596                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
597                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
598                                  ctdb_addr_to_str(&vnn->public_address)));
599                 return -1;
600         }
601
602         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
603                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
604                                   "and we have it on iface[%s], but it was assigned to node %d"
605                                   "and we are node %d, banning ourself\n",
606                                  ctdb_addr_to_str(&vnn->public_address),
607                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
608                 ctdb_ban_self(ctdb);
609                 return -1;
610         }
611
612         if (vnn->pnn == -1 && have_ip) {
613                 vnn->pnn = ctdb->pnn;
614                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
615                                   "and we already have it on iface[%s], update local daemon\n",
616                                  ctdb_addr_to_str(&vnn->public_address),
617                                   ctdb_vnn_iface_string(vnn)));
618                 return 0;
619         }
620
621         if (vnn->iface) {
622                 if (vnn->iface->link_up) {
623                         /* only move when the rebalance gains something */
624                         if (vnn->iface->references > (best_iface->references + 1)) {
625                                 do_updateip = true;
626                         }
627                 } else if (vnn->iface != best_iface) {
628                         do_updateip = true;
629                 }
630         }
631
632         if (!have_ip) {
633                 if (do_updateip) {
634                         ctdb_vnn_unassign_iface(ctdb, vnn);
635                         do_updateip = false;
636                 }
637                 do_takeip = true;
638         }
639
640         if (do_takeip) {
641                 ret = ctdb_do_takeip(ctdb, c, vnn);
642                 if (ret != 0) {
643                         return -1;
644                 }
645         } else if (do_updateip) {
646                 ret = ctdb_do_updateip(ctdb, c, vnn);
647                 if (ret != 0) {
648                         return -1;
649                 }
650         } else {
651                 /*
652                  * The interface is up and the kernel known the ip
653                  * => do nothing
654                  */
655                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
656                         ctdb_addr_to_str(&pip->addr),
657                         vnn->public_netmask_bits,
658                         ctdb_vnn_iface_string(vnn)));
659                 return 0;
660         }
661
662         /* tell ctdb_control.c that we will be replying asynchronously */
663         *async_reply = true;
664
665         return 0;
666 }
667
668 /*
669   takeover an ip address old v4 style
670  */
671 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
672                                 struct ctdb_req_control *c,
673                                 TDB_DATA indata, 
674                                 bool *async_reply)
675 {
676         TDB_DATA data;
677         
678         data.dsize = sizeof(struct ctdb_public_ip);
679         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
680         CTDB_NO_MEMORY(ctdb, data.dptr);
681         
682         memcpy(data.dptr, indata.dptr, indata.dsize);
683         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
684 }
685
686 /*
687   kill any clients that are registered with a IP that is being released
688  */
689 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
690 {
691         struct ctdb_client_ip *ip;
692
693         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
694                 ctdb_addr_to_str(addr)));
695
696         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
697                 ctdb_sock_addr tmp_addr;
698
699                 tmp_addr = ip->addr;
700                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
701                         ip->client_id,
702                         ctdb_addr_to_str(&ip->addr)));
703
704                 if (ctdb_same_ip(&tmp_addr, addr)) {
705                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
706                                                                      ip->client_id, 
707                                                                      struct ctdb_client);
708                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
709                                 ip->client_id,
710                                 ctdb_addr_to_str(&ip->addr),
711                                 client->pid));
712
713                         if (client->pid != 0) {
714                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
715                                         (unsigned)client->pid,
716                                         ctdb_addr_to_str(addr),
717                                         ip->client_id));
718                                 kill(client->pid, SIGKILL);
719                         }
720                 }
721         }
722 }
723
724 /*
725   called when releaseip event finishes
726  */
727 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
728                                 void *private_data)
729 {
730         struct takeover_callback_state *state = 
731                 talloc_get_type(private_data, struct takeover_callback_state);
732         TDB_DATA data;
733
734         if (status == -ETIME) {
735                 ctdb_ban_self(ctdb);
736         }
737
738         /* send a message to all clients of this node telling them
739            that the cluster has been reconfigured and they should
740            release any sockets on this IP */
741         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
742         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
743         data.dsize = strlen((char *)data.dptr)+1;
744
745         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
746
747         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
748
749         /* kill clients that have registered with this IP */
750         release_kill_clients(ctdb, state->addr);
751
752         ctdb_vnn_unassign_iface(ctdb, state->vnn);
753
754         /* the control succeeded */
755         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
756         talloc_free(state);
757 }
758
759 /*
760   release an ip address
761  */
762 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
763                                 struct ctdb_req_control *c,
764                                 TDB_DATA indata, 
765                                 bool *async_reply)
766 {
767         int ret;
768         struct takeover_callback_state *state;
769         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
770         struct ctdb_vnn *vnn;
771
772         /* update our vnn list */
773         vnn = find_public_ip_vnn(ctdb, &pip->addr);
774         if (vnn == NULL) {
775                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
776                         ctdb_addr_to_str(&pip->addr)));
777                 return 0;
778         }
779         vnn->pnn = pip->pnn;
780
781         /* stop any previous arps */
782         talloc_free(vnn->takeover_ctx);
783         vnn->takeover_ctx = NULL;
784
785         if (!ctdb_sys_have_ip(&pip->addr)) {
786                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
787                         ctdb_addr_to_str(&pip->addr),
788                         vnn->public_netmask_bits, 
789                         ctdb_vnn_iface_string(vnn)));
790                 ctdb_vnn_unassign_iface(ctdb, vnn);
791                 return 0;
792         }
793
794         if (vnn->iface == NULL) {
795                 DEBUG(DEBUG_ERR,(__location__ " release_ip of IP %s is known to the kernel, "
796                                  "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
797                                  ctdb_addr_to_str(&vnn->public_address)));
798                 return 0;
799         }
800
801         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
802                 ctdb_addr_to_str(&pip->addr),
803                 vnn->public_netmask_bits, 
804                 ctdb_vnn_iface_string(vnn),
805                 pip->pnn));
806
807         state = talloc(ctdb, struct takeover_callback_state);
808         CTDB_NO_MEMORY(ctdb, state);
809
810         state->c = talloc_steal(state, c);
811         state->addr = talloc(state, ctdb_sock_addr);       
812         CTDB_NO_MEMORY(ctdb, state->addr);
813         *state->addr = pip->addr;
814         state->vnn   = vnn;
815
816         ret = ctdb_event_script_callback(ctdb, 
817                                          state, release_ip_callback, state,
818                                          false,
819                                          CTDB_EVENT_RELEASE_IP,
820                                          "%s %s %u",
821                                          ctdb_vnn_iface_string(vnn),
822                                          ctdb_addr_to_str(&pip->addr),
823                                          vnn->public_netmask_bits);
824         if (ret != 0) {
825                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
826                         ctdb_addr_to_str(&pip->addr),
827                         ctdb_vnn_iface_string(vnn)));
828                 talloc_free(state);
829                 return -1;
830         }
831
832         /* tell the control that we will be reply asynchronously */
833         *async_reply = true;
834         return 0;
835 }
836
837 /*
838   release an ip address old v4 style
839  */
840 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
841                                 struct ctdb_req_control *c,
842                                 TDB_DATA indata, 
843                                 bool *async_reply)
844 {
845         TDB_DATA data;
846         
847         data.dsize = sizeof(struct ctdb_public_ip);
848         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
849         CTDB_NO_MEMORY(ctdb, data.dptr);
850         
851         memcpy(data.dptr, indata.dptr, indata.dsize);
852         return ctdb_control_release_ip(ctdb, c, data, async_reply);
853 }
854
855
856 static int ctdb_add_public_address(struct ctdb_context *ctdb,
857                                    ctdb_sock_addr *addr,
858                                    unsigned mask, const char *ifaces)
859 {
860         struct ctdb_vnn      *vnn;
861         uint32_t num = 0;
862         char *tmp;
863         const char *iface;
864         int i;
865         int ret;
866
867         /* Verify that we dont have an entry for this ip yet */
868         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
869                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
870                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
871                                 ctdb_addr_to_str(addr)));
872                         return -1;
873                 }               
874         }
875
876         /* create a new vnn structure for this ip address */
877         vnn = talloc_zero(ctdb, struct ctdb_vnn);
878         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
879         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
880         tmp = talloc_strdup(vnn, ifaces);
881         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
882         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
883                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
884                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
885                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
886                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
887                 num++;
888         }
889         talloc_free(tmp);
890         vnn->ifaces[num] = NULL;
891         vnn->public_address      = *addr;
892         vnn->public_netmask_bits = mask;
893         vnn->pnn                 = -1;
894         if (ctdb_sys_have_ip(addr)) {
895                 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
896                 vnn->pnn = ctdb->pnn;
897         }
898
899         for (i=0; vnn->ifaces[i]; i++) {
900                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
901                 if (ret != 0) {
902                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
903                                            "for public_address[%s]\n",
904                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
905                         talloc_free(vnn);
906                         return -1;
907                 }
908                 if (i == 0) {
909                         vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
910                 }
911         }
912
913         DLIST_ADD(ctdb->vnn, vnn);
914
915         return 0;
916 }
917
918 /*
919   setup the event script directory
920 */
921 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
922 {
923         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
924         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
925         return 0;
926 }
927
928 /*
929   setup the public address lists from a file
930 */
931 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
932 {
933         char **lines;
934         int nlines;
935         int i;
936
937         lines = file_lines_load(alist, &nlines, ctdb);
938         if (lines == NULL) {
939                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
940                 return -1;
941         }
942         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
943                 nlines--;
944         }
945
946         for (i=0;i<nlines;i++) {
947                 unsigned mask;
948                 ctdb_sock_addr addr;
949                 const char *addrstr;
950                 const char *ifaces;
951                 char *tok, *line;
952
953                 line = lines[i];
954                 while ((*line == ' ') || (*line == '\t')) {
955                         line++;
956                 }
957                 if (*line == '#') {
958                         continue;
959                 }
960                 if (strcmp(line, "") == 0) {
961                         continue;
962                 }
963                 tok = strtok(line, " \t");
964                 addrstr = tok;
965                 tok = strtok(NULL, " \t");
966                 if (tok == NULL) {
967                         if (NULL == ctdb->default_public_interface) {
968                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
969                                          i+1));
970                                 talloc_free(lines);
971                                 return -1;
972                         }
973                         ifaces = ctdb->default_public_interface;
974                 } else {
975                         ifaces = tok;
976                 }
977
978                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
979                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
980                         talloc_free(lines);
981                         return -1;
982                 }
983                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
984                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
985                         talloc_free(lines);
986                         return -1;
987                 }
988         }
989
990         talloc_free(lines);
991         return 0;
992 }
993
994 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
995                               const char *iface,
996                               const char *ip)
997 {
998         struct ctdb_vnn *svnn;
999         struct ctdb_iface *cur = NULL;
1000         bool ok;
1001         int ret;
1002
1003         svnn = talloc_zero(ctdb, struct ctdb_vnn);
1004         CTDB_NO_MEMORY(ctdb, svnn);
1005
1006         svnn->ifaces = talloc_array(svnn, const char *, 2);
1007         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1008         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1009         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1010         svnn->ifaces[1] = NULL;
1011
1012         ok = parse_ip(ip, iface, 0, &svnn->public_address);
1013         if (!ok) {
1014                 talloc_free(svnn);
1015                 return -1;
1016         }
1017
1018         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1019         if (ret != 0) {
1020                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1021                                    "for single_ip[%s]\n",
1022                                    svnn->ifaces[0],
1023                                    ctdb_addr_to_str(&svnn->public_address)));
1024                 talloc_free(svnn);
1025                 return -1;
1026         }
1027
1028         /* assume the single public ip interface is initially "good" */
1029         cur = ctdb_find_iface(ctdb, iface);
1030         if (cur == NULL) {
1031                 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1032                 return -1;
1033         }
1034         cur->link_up = true;
1035
1036         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1037         if (ret != 0) {
1038                 talloc_free(svnn);
1039                 return -1;
1040         }
1041
1042         ctdb->single_ip_vnn = svnn;
1043         return 0;
1044 }
1045
1046 struct ctdb_public_ip_list {
1047         struct ctdb_public_ip_list *next;
1048         uint32_t pnn;
1049         ctdb_sock_addr addr;
1050 };
1051
1052
1053 /* Given a physical node, return the number of
1054    public addresses that is currently assigned to this node.
1055 */
1056 static int node_ip_coverage(struct ctdb_context *ctdb, 
1057         int32_t pnn,
1058         struct ctdb_public_ip_list *ips)
1059 {
1060         int num=0;
1061
1062         for (;ips;ips=ips->next) {
1063                 if (ips->pnn == pnn) {
1064                         num++;
1065                 }
1066         }
1067         return num;
1068 }
1069
1070
1071 /* Check if this is a public ip known to the node, i.e. can that
1072    node takeover this ip ?
1073 */
1074 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1075                 struct ctdb_public_ip_list *ip)
1076 {
1077         struct ctdb_all_public_ips *public_ips;
1078         int i;
1079
1080         public_ips = ctdb->nodes[pnn]->available_public_ips;
1081
1082         if (public_ips == NULL) {
1083                 return -1;
1084         }
1085
1086         for (i=0;i<public_ips->num;i++) {
1087                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1088                         /* yes, this node can serve this public ip */
1089                         return 0;
1090                 }
1091         }
1092
1093         return -1;
1094 }
1095
1096
1097 /* search the node lists list for a node to takeover this ip.
1098    pick the node that currently are serving the least number of ips
1099    so that the ips get spread out evenly.
1100 */
1101 static int find_takeover_node(struct ctdb_context *ctdb, 
1102                 struct ctdb_node_map *nodemap, uint32_t mask, 
1103                 struct ctdb_public_ip_list *ip,
1104                 struct ctdb_public_ip_list *all_ips)
1105 {
1106         int pnn, min=0, num;
1107         int i;
1108
1109         pnn    = -1;
1110         for (i=0;i<nodemap->num;i++) {
1111                 if (nodemap->nodes[i].flags & mask) {
1112                         /* This node is not healty and can not be used to serve
1113                            a public address 
1114                         */
1115                         continue;
1116                 }
1117
1118                 /* verify that this node can serve this ip */
1119                 if (can_node_serve_ip(ctdb, i, ip)) {
1120                         /* no it couldnt   so skip to the next node */
1121                         continue;
1122                 }
1123
1124                 num = node_ip_coverage(ctdb, i, all_ips);
1125                 /* was this the first node we checked ? */
1126                 if (pnn == -1) {
1127                         pnn = i;
1128                         min  = num;
1129                 } else {
1130                         if (num < min) {
1131                                 pnn = i;
1132                                 min  = num;
1133                         }
1134                 }
1135         }       
1136         if (pnn == -1) {
1137                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1138                         ctdb_addr_to_str(&ip->addr)));
1139
1140                 return -1;
1141         }
1142
1143         ip->pnn = pnn;
1144         return 0;
1145 }
1146
1147 #define IP_KEYLEN       4
1148 static uint32_t *ip_key(ctdb_sock_addr *ip)
1149 {
1150         static uint32_t key[IP_KEYLEN];
1151
1152         bzero(key, sizeof(key));
1153
1154         switch (ip->sa.sa_family) {
1155         case AF_INET:
1156                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1157                 break;
1158         case AF_INET6:
1159                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1160                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1161                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1162                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1163                 break;
1164         default:
1165                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1166                 return key;
1167         }
1168
1169         return key;
1170 }
1171
1172 static void *add_ip_callback(void *parm, void *data)
1173 {
1174         struct ctdb_public_ip_list *this_ip = parm; 
1175         struct ctdb_public_ip_list *prev_ip = data; 
1176
1177         if (prev_ip == NULL) {
1178                 return parm;
1179         }
1180         if (this_ip->pnn == -1) {
1181                 this_ip->pnn = prev_ip->pnn;
1182         }
1183
1184         return parm;
1185 }
1186
1187 void getips_count_callback(void *param, void *data)
1188 {
1189         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1190         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1191
1192         new_ip->next = *ip_list;
1193         *ip_list     = new_ip;
1194 }
1195
1196 static struct ctdb_public_ip_list *
1197 create_merged_ip_list(struct ctdb_context *ctdb)
1198 {
1199         int i, j;
1200         struct ctdb_public_ip_list *ip_list;
1201         struct ctdb_all_public_ips *public_ips;
1202
1203         if (ctdb->ip_tree != NULL) {
1204                 talloc_free(ctdb->ip_tree);
1205                 ctdb->ip_tree = NULL;
1206         }
1207         ctdb->ip_tree = trbt_create(ctdb, 0);
1208
1209         for (i=0;i<ctdb->num_nodes;i++) {
1210                 public_ips = ctdb->nodes[i]->known_public_ips;
1211
1212                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1213                         continue;
1214                 }
1215
1216                 /* there were no public ips for this node */
1217                 if (public_ips == NULL) {
1218                         continue;
1219                 }               
1220
1221                 for (j=0;j<public_ips->num;j++) {
1222                         struct ctdb_public_ip_list *tmp_ip; 
1223
1224                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1225                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1226                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1227                         tmp_ip->addr = public_ips->ips[j].addr;
1228                         tmp_ip->next = NULL;
1229
1230                         trbt_insertarray32_callback(ctdb->ip_tree,
1231                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1232                                 add_ip_callback,
1233                                 tmp_ip);
1234                 }
1235         }
1236
1237         ip_list = NULL;
1238         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1239
1240         return ip_list;
1241 }
1242
1243 /*
1244   make any IP alias changes for public addresses that are necessary 
1245  */
1246 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1247 {
1248         int i, num_healthy, retries;
1249         struct ctdb_public_ip ip;
1250         struct ctdb_public_ipv4 ipv4;
1251         uint32_t mask, *nodes;
1252         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1253         int maxnode, maxnum=0, minnode, minnum=0, num;
1254         TDB_DATA data;
1255         struct timeval timeout;
1256         struct client_async_data *async_data;
1257         struct ctdb_client_control_state *state;
1258         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1259
1260         /*
1261          * ip failover is completely disabled, just send out the 
1262          * ipreallocated event.
1263          */
1264         if (ctdb->tunable.disable_ip_failover != 0) {
1265                 goto ipreallocated;
1266         }
1267
1268         ZERO_STRUCT(ip);
1269
1270         /* Count how many completely healthy nodes we have */
1271         num_healthy = 0;
1272         for (i=0;i<nodemap->num;i++) {
1273                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1274                         num_healthy++;
1275                 }
1276         }
1277
1278         if (num_healthy > 0) {
1279                 /* We have healthy nodes, so only consider them for 
1280                    serving public addresses
1281                 */
1282                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1283         } else {
1284                 /* We didnt have any completely healthy nodes so
1285                    use "disabled" nodes as a fallback
1286                 */
1287                 mask = NODE_FLAGS_INACTIVE;
1288         }
1289
1290         /* since nodes only know about those public addresses that
1291            can be served by that particular node, no single node has
1292            a full list of all public addresses that exist in the cluster.
1293            Walk over all node structures and create a merged list of
1294            all public addresses that exist in the cluster.
1295
1296            keep the tree of ips around as ctdb->ip_tree
1297         */
1298         all_ips = create_merged_ip_list(ctdb);
1299
1300         /* If we want deterministic ip allocations, i.e. that the ip addresses
1301            will always be allocated the same way for a specific set of
1302            available/unavailable nodes.
1303         */
1304         if (1 == ctdb->tunable.deterministic_public_ips) {              
1305                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1306                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1307                         tmp_ip->pnn = i%nodemap->num;
1308                 }
1309         }
1310
1311
1312         /* mark all public addresses with a masked node as being served by
1313            node -1
1314         */
1315         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1316                 if (tmp_ip->pnn == -1) {
1317                         continue;
1318                 }
1319                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1320                         tmp_ip->pnn = -1;
1321                 }
1322         }
1323
1324         /* verify that the assigned nodes can serve that public ip
1325            and set it to -1 if not
1326         */
1327         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1328                 if (tmp_ip->pnn == -1) {
1329                         continue;
1330                 }
1331                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1332                         /* this node can not serve this ip. */
1333                         tmp_ip->pnn = -1;
1334                 }
1335         }
1336
1337
1338         /* now we must redistribute all public addresses with takeover node
1339            -1 among the nodes available
1340         */
1341         retries = 0;
1342 try_again:
1343         /* loop over all ip's and find a physical node to cover for 
1344            each unassigned ip.
1345         */
1346         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1347                 if (tmp_ip->pnn == -1) {
1348                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1349                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1350                                         ctdb_addr_to_str(&tmp_ip->addr)));
1351                         }
1352                 }
1353         }
1354
1355         /* If we dont want ips to fail back after a node becomes healthy
1356            again, we wont even try to reallocat the ip addresses so that
1357            they are evenly spread out.
1358            This can NOT be used at the same time as DeterministicIPs !
1359         */
1360         if (1 == ctdb->tunable.no_ip_failback) {
1361                 if (1 == ctdb->tunable.deterministic_public_ips) {
1362                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1363                 }
1364                 goto finished;
1365         }
1366
1367
1368         /* now, try to make sure the ip adresses are evenly distributed
1369            across the node.
1370            for each ip address, loop over all nodes that can serve this
1371            ip and make sure that the difference between the node
1372            serving the most and the node serving the least ip's are not greater
1373            than 1.
1374         */
1375         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1376                 if (tmp_ip->pnn == -1) {
1377                         continue;
1378                 }
1379
1380                 /* Get the highest and lowest number of ips's served by any 
1381                    valid node which can serve this ip.
1382                 */
1383                 maxnode = -1;
1384                 minnode = -1;
1385                 for (i=0;i<nodemap->num;i++) {
1386                         if (nodemap->nodes[i].flags & mask) {
1387                                 continue;
1388                         }
1389
1390                         /* only check nodes that can actually serve this ip */
1391                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1392                                 /* no it couldnt   so skip to the next node */
1393                                 continue;
1394                         }
1395
1396                         num = node_ip_coverage(ctdb, i, all_ips);
1397                         if (maxnode == -1) {
1398                                 maxnode = i;
1399                                 maxnum  = num;
1400                         } else {
1401                                 if (num > maxnum) {
1402                                         maxnode = i;
1403                                         maxnum  = num;
1404                                 }
1405                         }
1406                         if (minnode == -1) {
1407                                 minnode = i;
1408                                 minnum  = num;
1409                         } else {
1410                                 if (num < minnum) {
1411                                         minnode = i;
1412                                         minnum  = num;
1413                                 }
1414                         }
1415                 }
1416                 if (maxnode == -1) {
1417                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1418                                 ctdb_addr_to_str(&tmp_ip->addr)));
1419
1420                         continue;
1421                 }
1422
1423                 /* If we want deterministic IPs then dont try to reallocate 
1424                    them to spread out the load.
1425                 */
1426                 if (1 == ctdb->tunable.deterministic_public_ips) {
1427                         continue;
1428                 }
1429
1430                 /* if the spread between the smallest and largest coverage by
1431                    a node is >=2 we steal one of the ips from the node with
1432                    most coverage to even things out a bit.
1433                    try to do this at most 5 times  since we dont want to spend
1434                    too much time balancing the ip coverage.
1435                 */
1436                 if ( (maxnum > minnum+1)
1437                   && (retries < 5) ){
1438                         struct ctdb_public_ip_list *tmp;
1439
1440                         /* mark one of maxnode's vnn's as unassigned and try
1441                            again
1442                         */
1443                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1444                                 if (tmp->pnn == maxnode) {
1445                                         tmp->pnn = -1;
1446                                         retries++;
1447                                         goto try_again;
1448                                 }
1449                         }
1450                 }
1451         }
1452
1453
1454         /* finished distributing the public addresses, now just send the 
1455            info out to the nodes
1456         */
1457 finished:
1458
1459         /* at this point ->pnn is the node which will own each IP
1460            or -1 if there is no node that can cover this ip
1461         */
1462
1463         /* now tell all nodes to delete any alias that they should not
1464            have.  This will be a NOOP on nodes that don't currently
1465            hold the given alias */
1466         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1467         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1468
1469         for (i=0;i<nodemap->num;i++) {
1470                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1471                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1472                         continue;
1473                 }
1474
1475                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1476                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1477                                 /* This node should be serving this
1478                                    vnn so dont tell it to release the ip
1479                                 */
1480                                 continue;
1481                         }
1482                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1483                                 ipv4.pnn = tmp_ip->pnn;
1484                                 ipv4.sin = tmp_ip->addr.ip;
1485
1486                                 timeout = TAKEOVER_TIMEOUT();
1487                                 data.dsize = sizeof(ipv4);
1488                                 data.dptr  = (uint8_t *)&ipv4;
1489                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1490                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1491                                                 data, async_data,
1492                                                 &timeout, NULL);
1493                         } else {
1494                                 ip.pnn  = tmp_ip->pnn;
1495                                 ip.addr = tmp_ip->addr;
1496
1497                                 timeout = TAKEOVER_TIMEOUT();
1498                                 data.dsize = sizeof(ip);
1499                                 data.dptr  = (uint8_t *)&ip;
1500                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1501                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1502                                                 data, async_data,
1503                                                 &timeout, NULL);
1504                         }
1505
1506                         if (state == NULL) {
1507                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1508                                 talloc_free(tmp_ctx);
1509                                 return -1;
1510                         }
1511                 
1512                         ctdb_client_async_add(async_data, state);
1513                 }
1514         }
1515         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1516                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1517                 talloc_free(tmp_ctx);
1518                 return -1;
1519         }
1520         talloc_free(async_data);
1521
1522
1523         /* tell all nodes to get their own IPs */
1524         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1525         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1526         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1527                 if (tmp_ip->pnn == -1) {
1528                         /* this IP won't be taken over */
1529                         continue;
1530                 }
1531
1532                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1533                         ipv4.pnn = tmp_ip->pnn;
1534                         ipv4.sin = tmp_ip->addr.ip;
1535
1536                         timeout = TAKEOVER_TIMEOUT();
1537                         data.dsize = sizeof(ipv4);
1538                         data.dptr  = (uint8_t *)&ipv4;
1539                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1540                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1541                                         data, async_data,
1542                                         &timeout, NULL);
1543                 } else {
1544                         ip.pnn  = tmp_ip->pnn;
1545                         ip.addr = tmp_ip->addr;
1546
1547                         timeout = TAKEOVER_TIMEOUT();
1548                         data.dsize = sizeof(ip);
1549                         data.dptr  = (uint8_t *)&ip;
1550                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1551                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1552                                         data, async_data,
1553                                         &timeout, NULL);
1554                 }
1555                 if (state == NULL) {
1556                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1557                         talloc_free(tmp_ctx);
1558                         return -1;
1559                 }
1560                 
1561                 ctdb_client_async_add(async_data, state);
1562         }
1563         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1564                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1565                 talloc_free(tmp_ctx);
1566                 return -1;
1567         }
1568
1569 ipreallocated:
1570         /* tell all nodes to update natwg */
1571         /* send the flags update natgw on all connected nodes */
1572         data.dptr  = discard_const("ipreallocated");
1573         data.dsize = strlen((char *)data.dptr) + 1; 
1574         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1575         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1576                                       nodes, 0, TAKEOVER_TIMEOUT(),
1577                                       false, data,
1578                                       NULL, NULL,
1579                                       NULL) != 0) {
1580                 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1581         }
1582
1583         talloc_free(tmp_ctx);
1584         return 0;
1585 }
1586
1587
1588 /*
1589   destroy a ctdb_client_ip structure
1590  */
1591 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1592 {
1593         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1594                 ctdb_addr_to_str(&ip->addr),
1595                 ntohs(ip->addr.ip.sin_port),
1596                 ip->client_id));
1597
1598         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1599         return 0;
1600 }
1601
1602 /*
1603   called by a client to inform us of a TCP connection that it is managing
1604   that should tickled with an ACK when IP takeover is done
1605   we handle both the old ipv4 style of packets as well as the new ipv4/6
1606   pdus.
1607  */
1608 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1609                                 TDB_DATA indata)
1610 {
1611         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1612         struct ctdb_control_tcp *old_addr = NULL;
1613         struct ctdb_control_tcp_addr new_addr;
1614         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1615         struct ctdb_tcp_list *tcp;
1616         struct ctdb_tcp_connection t;
1617         int ret;
1618         TDB_DATA data;
1619         struct ctdb_client_ip *ip;
1620         struct ctdb_vnn *vnn;
1621         ctdb_sock_addr addr;
1622
1623         switch (indata.dsize) {
1624         case sizeof(struct ctdb_control_tcp):
1625                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1626                 ZERO_STRUCT(new_addr);
1627                 tcp_sock = &new_addr;
1628                 tcp_sock->src.ip  = old_addr->src;
1629                 tcp_sock->dest.ip = old_addr->dest;
1630                 break;
1631         case sizeof(struct ctdb_control_tcp_addr):
1632                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1633                 break;
1634         default:
1635                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1636                                  "to ctdb_control_tcp_client. size was %d but "
1637                                  "only allowed sizes are %lu and %lu\n",
1638                                  (int)indata.dsize,
1639                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1640                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1641                 return -1;
1642         }
1643
1644         addr = tcp_sock->src;
1645         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1646         addr = tcp_sock->dest;
1647         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1648
1649         ZERO_STRUCT(addr);
1650         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1651         vnn = find_public_ip_vnn(ctdb, &addr);
1652         if (vnn == NULL) {
1653                 switch (addr.sa.sa_family) {
1654                 case AF_INET:
1655                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1656                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1657                                         ctdb_addr_to_str(&addr)));
1658                         }
1659                         break;
1660                 case AF_INET6:
1661                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1662                                 ctdb_addr_to_str(&addr)));
1663                         break;
1664                 default:
1665                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1666                 }
1667
1668                 return 0;
1669         }
1670
1671         if (vnn->pnn != ctdb->pnn) {
1672                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1673                         ctdb_addr_to_str(&addr),
1674                         client_id, client->pid));
1675                 /* failing this call will tell smbd to die */
1676                 return -1;
1677         }
1678
1679         ip = talloc(client, struct ctdb_client_ip);
1680         CTDB_NO_MEMORY(ctdb, ip);
1681
1682         ip->ctdb      = ctdb;
1683         ip->addr      = addr;
1684         ip->client_id = client_id;
1685         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1686         DLIST_ADD(ctdb->client_ip_list, ip);
1687
1688         tcp = talloc(client, struct ctdb_tcp_list);
1689         CTDB_NO_MEMORY(ctdb, tcp);
1690
1691         tcp->connection.src_addr = tcp_sock->src;
1692         tcp->connection.dst_addr = tcp_sock->dest;
1693
1694         DLIST_ADD(client->tcp_list, tcp);
1695
1696         t.src_addr = tcp_sock->src;
1697         t.dst_addr = tcp_sock->dest;
1698
1699         data.dptr = (uint8_t *)&t;
1700         data.dsize = sizeof(t);
1701
1702         switch (addr.sa.sa_family) {
1703         case AF_INET:
1704                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1705                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1706                         ctdb_addr_to_str(&tcp_sock->src),
1707                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1708                 break;
1709         case AF_INET6:
1710                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1711                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1712                         ctdb_addr_to_str(&tcp_sock->src),
1713                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1714                 break;
1715         default:
1716                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1717         }
1718
1719
1720         /* tell all nodes about this tcp connection */
1721         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1722                                        CTDB_CONTROL_TCP_ADD,
1723                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1724         if (ret != 0) {
1725                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1726                 return -1;
1727         }
1728
1729         return 0;
1730 }
1731
1732 /*
1733   find a tcp address on a list
1734  */
1735 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1736                                            struct ctdb_tcp_connection *tcp)
1737 {
1738         int i;
1739
1740         if (array == NULL) {
1741                 return NULL;
1742         }
1743
1744         for (i=0;i<array->num;i++) {
1745                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1746                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1747                         return &array->connections[i];
1748                 }
1749         }
1750         return NULL;
1751 }
1752
1753
1754
1755 /*
1756   called by a daemon to inform us of a TCP connection that one of its
1757   clients managing that should tickled with an ACK when IP takeover is
1758   done
1759  */
1760 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1761 {
1762         struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1763         struct ctdb_tcp_array *tcparray;
1764         struct ctdb_tcp_connection tcp;
1765         struct ctdb_vnn *vnn;
1766
1767         vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1768         if (vnn == NULL) {
1769                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1770                         ctdb_addr_to_str(&p->dst_addr)));
1771
1772                 return -1;
1773         }
1774
1775
1776         tcparray = vnn->tcp_array;
1777
1778         /* If this is the first tickle */
1779         if (tcparray == NULL) {
1780                 tcparray = talloc_size(ctdb->nodes, 
1781                         offsetof(struct ctdb_tcp_array, connections) +
1782                         sizeof(struct ctdb_tcp_connection) * 1);
1783                 CTDB_NO_MEMORY(ctdb, tcparray);
1784                 vnn->tcp_array = tcparray;
1785
1786                 tcparray->num = 0;
1787                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1788                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1789
1790                 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1791                 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1792                 tcparray->num++;
1793
1794                 if (tcp_update_needed) {
1795                         vnn->tcp_update_needed = true;
1796                 }
1797                 return 0;
1798         }
1799
1800
1801         /* Do we already have this tickle ?*/
1802         tcp.src_addr = p->src_addr;
1803         tcp.dst_addr = p->dst_addr;
1804         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1805                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1806                         ctdb_addr_to_str(&tcp.dst_addr),
1807                         ntohs(tcp.dst_addr.ip.sin_port),
1808                         vnn->pnn));
1809                 return 0;
1810         }
1811
1812         /* A new tickle, we must add it to the array */
1813         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1814                                         struct ctdb_tcp_connection,
1815                                         tcparray->num+1);
1816         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1817
1818         vnn->tcp_array = tcparray;
1819         tcparray->connections[tcparray->num].src_addr = p->src_addr;
1820         tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1821         tcparray->num++;
1822                                 
1823         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1824                 ctdb_addr_to_str(&tcp.dst_addr),
1825                 ntohs(tcp.dst_addr.ip.sin_port),
1826                 vnn->pnn));
1827
1828         if (tcp_update_needed) {
1829                 vnn->tcp_update_needed = true;
1830         }
1831
1832         return 0;
1833 }
1834
1835
1836 /*
1837   called by a daemon to inform us of a TCP connection that one of its
1838   clients managing that should tickled with an ACK when IP takeover is
1839   done
1840  */
1841 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1842 {
1843         struct ctdb_tcp_connection *tcpp;
1844         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1845
1846         if (vnn == NULL) {
1847                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1848                         ctdb_addr_to_str(&conn->dst_addr)));
1849                 return;
1850         }
1851
1852         /* if the array is empty we cant remove it
1853            and we dont need to do anything
1854          */
1855         if (vnn->tcp_array == NULL) {
1856                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1857                         ctdb_addr_to_str(&conn->dst_addr),
1858                         ntohs(conn->dst_addr.ip.sin_port)));
1859                 return;
1860         }
1861
1862
1863         /* See if we know this connection
1864            if we dont know this connection  then we dont need to do anything
1865          */
1866         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1867         if (tcpp == NULL) {
1868                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1869                         ctdb_addr_to_str(&conn->dst_addr),
1870                         ntohs(conn->dst_addr.ip.sin_port)));
1871                 return;
1872         }
1873
1874
1875         /* We need to remove this entry from the array.
1876            Instead of allocating a new array and copying data to it
1877            we cheat and just copy the last entry in the existing array
1878            to the entry that is to be removed and just shring the 
1879            ->num field
1880          */
1881         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1882         vnn->tcp_array->num--;
1883
1884         /* If we deleted the last entry we also need to remove the entire array
1885          */
1886         if (vnn->tcp_array->num == 0) {
1887                 talloc_free(vnn->tcp_array);
1888                 vnn->tcp_array = NULL;
1889         }               
1890
1891         vnn->tcp_update_needed = true;
1892
1893         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1894                 ctdb_addr_to_str(&conn->src_addr),
1895                 ntohs(conn->src_addr.ip.sin_port)));
1896 }
1897
1898
1899 /*
1900   called by a daemon to inform us of a TCP connection that one of its
1901   clients used are no longer needed in the tickle database
1902  */
1903 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1904 {
1905         struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1906
1907         ctdb_remove_tcp_connection(ctdb, conn);
1908
1909         return 0;
1910 }
1911
1912
1913 /*
1914   called when a daemon restarts - send all tickes for all public addresses
1915   we are serving immediately to the new node.
1916  */
1917 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1918 {
1919 /*XXX here we should send all tickes we are serving to the new node */
1920         return 0;
1921 }
1922
1923
1924 /*
1925   called when a client structure goes away - hook to remove
1926   elements from the tcp_list in all daemons
1927  */
1928 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1929 {
1930         while (client->tcp_list) {
1931                 struct ctdb_tcp_list *tcp = client->tcp_list;
1932                 DLIST_REMOVE(client->tcp_list, tcp);
1933                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1934         }
1935 }
1936
1937
1938 /*
1939   release all IPs on shutdown
1940  */
1941 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1942 {
1943         struct ctdb_vnn *vnn;
1944
1945         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1946                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1947                         ctdb_vnn_unassign_iface(ctdb, vnn);
1948                         continue;
1949                 }
1950                 if (!vnn->iface) {
1951                         continue;
1952                 }
1953                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1954                                   ctdb_vnn_iface_string(vnn),
1955                                   ctdb_addr_to_str(&vnn->public_address),
1956                                   vnn->public_netmask_bits);
1957                 release_kill_clients(ctdb, &vnn->public_address);
1958                 ctdb_vnn_unassign_iface(ctdb, vnn);
1959         }
1960 }
1961
1962
1963 /*
1964   get list of public IPs
1965  */
1966 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1967                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1968 {
1969         int i, num, len;
1970         struct ctdb_all_public_ips *ips;
1971         struct ctdb_vnn *vnn;
1972         bool only_available = false;
1973
1974         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1975                 only_available = true;
1976         }
1977
1978         /* count how many public ip structures we have */
1979         num = 0;
1980         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1981                 num++;
1982         }
1983
1984         len = offsetof(struct ctdb_all_public_ips, ips) + 
1985                 num*sizeof(struct ctdb_public_ip);
1986         ips = talloc_zero_size(outdata, len);
1987         CTDB_NO_MEMORY(ctdb, ips);
1988
1989         i = 0;
1990         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1991                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1992                         continue;
1993                 }
1994                 ips->ips[i].pnn  = vnn->pnn;
1995                 ips->ips[i].addr = vnn->public_address;
1996                 i++;
1997         }
1998         ips->num = i;
1999         len = offsetof(struct ctdb_all_public_ips, ips) +
2000                 i*sizeof(struct ctdb_public_ip);
2001
2002         outdata->dsize = len;
2003         outdata->dptr  = (uint8_t *)ips;
2004
2005         return 0;
2006 }
2007
2008
2009 /*
2010   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
2011  */
2012 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
2013                                     struct ctdb_req_control *c, TDB_DATA *outdata)
2014 {
2015         int i, num, len;
2016         struct ctdb_all_public_ipsv4 *ips;
2017         struct ctdb_vnn *vnn;
2018
2019         /* count how many public ip structures we have */
2020         num = 0;
2021         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2022                 if (vnn->public_address.sa.sa_family != AF_INET) {
2023                         continue;
2024                 }
2025                 num++;
2026         }
2027
2028         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
2029                 num*sizeof(struct ctdb_public_ipv4);
2030         ips = talloc_zero_size(outdata, len);
2031         CTDB_NO_MEMORY(ctdb, ips);
2032
2033         outdata->dsize = len;
2034         outdata->dptr  = (uint8_t *)ips;
2035
2036         ips->num = num;
2037         i = 0;
2038         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2039                 if (vnn->public_address.sa.sa_family != AF_INET) {
2040                         continue;
2041                 }
2042                 ips->ips[i].pnn = vnn->pnn;
2043                 ips->ips[i].sin = vnn->public_address.ip;
2044                 i++;
2045         }
2046
2047         return 0;
2048 }
2049
2050 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2051                                         struct ctdb_req_control *c,
2052                                         TDB_DATA indata,
2053                                         TDB_DATA *outdata)
2054 {
2055         int i, num, len;
2056         ctdb_sock_addr *addr;
2057         struct ctdb_control_public_ip_info *info;
2058         struct ctdb_vnn *vnn;
2059
2060         addr = (ctdb_sock_addr *)indata.dptr;
2061
2062         vnn = find_public_ip_vnn(ctdb, addr);
2063         if (vnn == NULL) {
2064                 /* if it is not a public ip   it could be our 'single ip' */
2065                 if (ctdb->single_ip_vnn) {
2066                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2067                                 vnn = ctdb->single_ip_vnn;
2068                         }
2069                 }
2070         }
2071         if (vnn == NULL) {
2072                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2073                                  "'%s'not a public address\n",
2074                                  ctdb_addr_to_str(addr)));
2075                 return -1;
2076         }
2077
2078         /* count how many public ip structures we have */
2079         num = 0;
2080         for (;vnn->ifaces[num];) {
2081                 num++;
2082         }
2083
2084         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2085                 num*sizeof(struct ctdb_control_iface_info);
2086         info = talloc_zero_size(outdata, len);
2087         CTDB_NO_MEMORY(ctdb, info);
2088
2089         info->ip.addr = vnn->public_address;
2090         info->ip.pnn = vnn->pnn;
2091         info->active_idx = 0xFFFFFFFF;
2092
2093         for (i=0; vnn->ifaces[i]; i++) {
2094                 struct ctdb_iface *cur;
2095
2096                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2097                 if (cur == NULL) {
2098                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2099                                            vnn->ifaces[i]));
2100                         return -1;
2101                 }
2102                 if (vnn->iface == cur) {
2103                         info->active_idx = i;
2104                 }
2105                 strcpy(info->ifaces[i].name, cur->name);
2106                 info->ifaces[i].link_state = cur->link_up;
2107                 info->ifaces[i].references = cur->references;
2108         }
2109         info->num = i;
2110         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2111                 i*sizeof(struct ctdb_control_iface_info);
2112
2113         outdata->dsize = len;
2114         outdata->dptr  = (uint8_t *)info;
2115
2116         return 0;
2117 }
2118
2119 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2120                                 struct ctdb_req_control *c,
2121                                 TDB_DATA *outdata)
2122 {
2123         int i, num, len;
2124         struct ctdb_control_get_ifaces *ifaces;
2125         struct ctdb_iface *cur;
2126
2127         /* count how many public ip structures we have */
2128         num = 0;
2129         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2130                 num++;
2131         }
2132
2133         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2134                 num*sizeof(struct ctdb_control_iface_info);
2135         ifaces = talloc_zero_size(outdata, len);
2136         CTDB_NO_MEMORY(ctdb, ifaces);
2137
2138         i = 0;
2139         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2140                 strcpy(ifaces->ifaces[i].name, cur->name);
2141                 ifaces->ifaces[i].link_state = cur->link_up;
2142                 ifaces->ifaces[i].references = cur->references;
2143                 i++;
2144         }
2145         ifaces->num = i;
2146         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2147                 i*sizeof(struct ctdb_control_iface_info);
2148
2149         outdata->dsize = len;
2150         outdata->dptr  = (uint8_t *)ifaces;
2151
2152         return 0;
2153 }
2154
2155 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2156                                     struct ctdb_req_control *c,
2157                                     TDB_DATA indata)
2158 {
2159         struct ctdb_control_iface_info *info;
2160         struct ctdb_iface *iface;
2161         bool link_up = false;
2162
2163         info = (struct ctdb_control_iface_info *)indata.dptr;
2164
2165         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2166                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2167                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2168                                   len, len, info->name));
2169                 return -1;
2170         }
2171
2172         switch (info->link_state) {
2173         case 0:
2174                 link_up = false;
2175                 break;
2176         case 1:
2177                 link_up = true;
2178                 break;
2179         default:
2180                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2181                                   (unsigned int)info->link_state));
2182                 return -1;
2183         }
2184
2185         if (info->references != 0) {
2186                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2187                                   (unsigned int)info->references));
2188                 return -1;
2189         }
2190
2191         iface = ctdb_find_iface(ctdb, info->name);
2192         if (iface == NULL) {
2193                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2194                                   info->name));
2195                 return -1;
2196         }
2197
2198         if (link_up == iface->link_up) {
2199                 return 0;
2200         }
2201
2202         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2203               ("iface[%s] has changed it's link status %s => %s\n",
2204                iface->name,
2205                iface->link_up?"up":"down",
2206                link_up?"up":"down"));
2207
2208         iface->link_up = link_up;
2209         return 0;
2210 }
2211
2212
2213 /* 
2214    structure containing the listening socket and the list of tcp connections
2215    that the ctdb daemon is to kill
2216 */
2217 struct ctdb_kill_tcp {
2218         struct ctdb_vnn *vnn;
2219         struct ctdb_context *ctdb;
2220         int capture_fd;
2221         struct fd_event *fde;
2222         trbt_tree_t *connections;
2223         void *private_data;
2224 };
2225
2226 /*
2227   a tcp connection that is to be killed
2228  */
2229 struct ctdb_killtcp_con {
2230         ctdb_sock_addr src_addr;
2231         ctdb_sock_addr dst_addr;
2232         int count;
2233         struct ctdb_kill_tcp *killtcp;
2234 };
2235
2236 /* this function is used to create a key to represent this socketpair
2237    in the killtcp tree.
2238    this key is used to insert and lookup matching socketpairs that are
2239    to be tickled and RST
2240 */
2241 #define KILLTCP_KEYLEN  10
2242 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2243 {
2244         static uint32_t key[KILLTCP_KEYLEN];
2245
2246         bzero(key, sizeof(key));
2247
2248         if (src->sa.sa_family != dst->sa.sa_family) {
2249                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2250                 return key;
2251         }
2252         
2253         switch (src->sa.sa_family) {
2254         case AF_INET:
2255                 key[0]  = dst->ip.sin_addr.s_addr;
2256                 key[1]  = src->ip.sin_addr.s_addr;
2257                 key[2]  = dst->ip.sin_port;
2258                 key[3]  = src->ip.sin_port;
2259                 break;
2260         case AF_INET6:
2261                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2262                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2263                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2264                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2265                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2266                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2267                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2268                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2269                 key[8]  = dst->ip6.sin6_port;
2270                 key[9]  = src->ip6.sin6_port;
2271                 break;
2272         default:
2273                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2274                 return key;
2275         }
2276
2277         return key;
2278 }
2279
2280 /*
2281   called when we get a read event on the raw socket
2282  */
2283 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2284                                 uint16_t flags, void *private_data)
2285 {
2286         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2287         struct ctdb_killtcp_con *con;
2288         ctdb_sock_addr src, dst;
2289         uint32_t ack_seq, seq;
2290
2291         if (!(flags & EVENT_FD_READ)) {
2292                 return;
2293         }
2294
2295         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2296                                 killtcp->private_data,
2297                                 &src, &dst,
2298                                 &ack_seq, &seq) != 0) {
2299                 /* probably a non-tcp ACK packet */
2300                 return;
2301         }
2302
2303         /* check if we have this guy in our list of connections
2304            to kill
2305         */
2306         con = trbt_lookuparray32(killtcp->connections, 
2307                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2308         if (con == NULL) {
2309                 /* no this was some other packet we can just ignore */
2310                 return;
2311         }
2312
2313         /* This one has been tickled !
2314            now reset him and remove him from the list.
2315          */
2316         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2317                 ntohs(con->dst_addr.ip.sin_port),
2318                 ctdb_addr_to_str(&con->src_addr),
2319                 ntohs(con->src_addr.ip.sin_port)));
2320
2321         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2322         talloc_free(con);
2323 }
2324
2325
2326 /* when traversing the list of all tcp connections to send tickle acks to
2327    (so that we can capture the ack coming back and kill the connection
2328     by a RST)
2329    this callback is called for each connection we are currently trying to kill
2330 */
2331 static void tickle_connection_traverse(void *param, void *data)
2332 {
2333         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2334
2335         /* have tried too many times, just give up */
2336         if (con->count >= 5) {
2337                 /* can't delete in traverse: reparent to delete_cons */
2338                 talloc_steal(param, con);
2339                 return;
2340         }
2341
2342         /* othervise, try tickling it again */
2343         con->count++;
2344         ctdb_sys_send_tcp(
2345                 (ctdb_sock_addr *)&con->dst_addr,
2346                 (ctdb_sock_addr *)&con->src_addr,
2347                 0, 0, 0);
2348 }
2349
2350
2351 /* 
2352    called every second until all sentenced connections have been reset
2353  */
2354 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2355                                               struct timeval t, void *private_data)
2356 {
2357         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2358         void *delete_cons = talloc_new(NULL);
2359
2360         /* loop over all connections sending tickle ACKs */
2361         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2362
2363         /* now we've finished traverse, it's safe to do deletion. */
2364         talloc_free(delete_cons);
2365
2366         /* If there are no more connections to kill we can remove the
2367            entire killtcp structure
2368          */
2369         if ( (killtcp->connections == NULL) || 
2370              (killtcp->connections->root == NULL) ) {
2371                 talloc_free(killtcp);
2372                 return;
2373         }
2374
2375         /* try tickling them again in a seconds time
2376          */
2377         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2378                         ctdb_tickle_sentenced_connections, killtcp);
2379 }
2380
2381 /*
2382   destroy the killtcp structure
2383  */
2384 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2385 {
2386         if (killtcp->vnn) {
2387                 killtcp->vnn->killtcp = NULL;
2388         }
2389         return 0;
2390 }
2391
2392
2393 /* nothing fancy here, just unconditionally replace any existing
2394    connection structure with the new one.
2395
2396    dont even free the old one if it did exist, that one is talloc_stolen
2397    by the same node in the tree anyway and will be deleted when the new data 
2398    is deleted
2399 */
2400 static void *add_killtcp_callback(void *parm, void *data)
2401 {
2402         return parm;
2403 }
2404
2405 /*
2406   add a tcp socket to the list of connections we want to RST
2407  */
2408 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2409                                        ctdb_sock_addr *s,
2410                                        ctdb_sock_addr *d)
2411 {
2412         ctdb_sock_addr src, dst;
2413         struct ctdb_kill_tcp *killtcp;
2414         struct ctdb_killtcp_con *con;
2415         struct ctdb_vnn *vnn;
2416
2417         ctdb_canonicalize_ip(s, &src);
2418         ctdb_canonicalize_ip(d, &dst);
2419
2420         vnn = find_public_ip_vnn(ctdb, &dst);
2421         if (vnn == NULL) {
2422                 vnn = find_public_ip_vnn(ctdb, &src);
2423         }
2424         if (vnn == NULL) {
2425                 /* if it is not a public ip   it could be our 'single ip' */
2426                 if (ctdb->single_ip_vnn) {
2427                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2428                                 vnn = ctdb->single_ip_vnn;
2429                         }
2430                 }
2431         }
2432         if (vnn == NULL) {
2433                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2434                 return -1;
2435         }
2436
2437         killtcp = vnn->killtcp;
2438         
2439         /* If this is the first connection to kill we must allocate
2440            a new structure
2441          */
2442         if (killtcp == NULL) {
2443                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2444                 CTDB_NO_MEMORY(ctdb, killtcp);
2445
2446                 killtcp->vnn         = vnn;
2447                 killtcp->ctdb        = ctdb;
2448                 killtcp->capture_fd  = -1;
2449                 killtcp->connections = trbt_create(killtcp, 0);
2450
2451                 vnn->killtcp         = killtcp;
2452                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2453         }
2454
2455
2456
2457         /* create a structure that describes this connection we want to
2458            RST and store it in killtcp->connections
2459         */
2460         con = talloc(killtcp, struct ctdb_killtcp_con);
2461         CTDB_NO_MEMORY(ctdb, con);
2462         con->src_addr = src;
2463         con->dst_addr = dst;
2464         con->count    = 0;
2465         con->killtcp  = killtcp;
2466
2467
2468         trbt_insertarray32_callback(killtcp->connections,
2469                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2470                         add_killtcp_callback, con);
2471
2472         /* 
2473            If we dont have a socket to listen on yet we must create it
2474          */
2475         if (killtcp->capture_fd == -1) {
2476                 const char *iface = ctdb_vnn_iface_string(vnn);
2477                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2478                 if (killtcp->capture_fd == -1) {
2479                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2480                                           "socket on iface '%s' for killtcp (%s)\n",
2481                                           iface, strerror(errno)));
2482                         goto failed;
2483                 }
2484         }
2485
2486
2487         if (killtcp->fde == NULL) {
2488                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2489                                             EVENT_FD_READ,
2490                                             capture_tcp_handler, killtcp);
2491                 tevent_fd_set_auto_close(killtcp->fde);
2492
2493                 /* We also need to set up some events to tickle all these connections
2494                    until they are all reset
2495                 */
2496                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2497                                 ctdb_tickle_sentenced_connections, killtcp);
2498         }
2499
2500         /* tickle him once now */
2501         ctdb_sys_send_tcp(
2502                 &con->dst_addr,
2503                 &con->src_addr,
2504                 0, 0, 0);
2505
2506         return 0;
2507
2508 failed:
2509         talloc_free(vnn->killtcp);
2510         vnn->killtcp = NULL;
2511         return -1;
2512 }
2513
2514 /*
2515   kill a TCP connection.
2516  */
2517 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2518 {
2519         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2520
2521         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2522 }
2523
2524 /*
2525   called by a daemon to inform us of the entire list of TCP tickles for
2526   a particular public address.
2527   this control should only be sent by the node that is currently serving
2528   that public address.
2529  */
2530 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2531 {
2532         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2533         struct ctdb_tcp_array *tcparray;
2534         struct ctdb_vnn *vnn;
2535
2536         /* We must at least have tickles.num or else we cant verify the size
2537            of the received data blob
2538          */
2539         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2540                                         tickles.connections)) {
2541                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2542                 return -1;
2543         }
2544
2545         /* verify that the size of data matches what we expect */
2546         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2547                                 tickles.connections)
2548                          + sizeof(struct ctdb_tcp_connection)
2549                                  * list->tickles.num) {
2550                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2551                 return -1;
2552         }       
2553
2554         vnn = find_public_ip_vnn(ctdb, &list->addr);
2555         if (vnn == NULL) {
2556                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2557                         ctdb_addr_to_str(&list->addr)));
2558
2559                 return 1;
2560         }
2561
2562         /* remove any old ticklelist we might have */
2563         talloc_free(vnn->tcp_array);
2564         vnn->tcp_array = NULL;
2565
2566         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2567         CTDB_NO_MEMORY(ctdb, tcparray);
2568
2569         tcparray->num = list->tickles.num;
2570
2571         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2572         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2573
2574         memcpy(tcparray->connections, &list->tickles.connections[0], 
2575                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2576
2577         /* We now have a new fresh tickle list array for this vnn */
2578         vnn->tcp_array = talloc_steal(vnn, tcparray);
2579         
2580         return 0;
2581 }
2582
2583 /*
2584   called to return the full list of tickles for the puclic address associated 
2585   with the provided vnn
2586  */
2587 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2588 {
2589         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2590         struct ctdb_control_tcp_tickle_list *list;
2591         struct ctdb_tcp_array *tcparray;
2592         int num;
2593         struct ctdb_vnn *vnn;
2594
2595         vnn = find_public_ip_vnn(ctdb, addr);
2596         if (vnn == NULL) {
2597                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2598                         ctdb_addr_to_str(addr)));
2599
2600                 return 1;
2601         }
2602
2603         tcparray = vnn->tcp_array;
2604         if (tcparray) {
2605                 num = tcparray->num;
2606         } else {
2607                 num = 0;
2608         }
2609
2610         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2611                                 tickles.connections)
2612                         + sizeof(struct ctdb_tcp_connection) * num;
2613
2614         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2615         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2616         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2617
2618         list->addr = *addr;
2619         list->tickles.num = num;
2620         if (num) {
2621                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2622                         sizeof(struct ctdb_tcp_connection) * num);
2623         }
2624
2625         return 0;
2626 }
2627
2628
2629 /*
2630   set the list of all tcp tickles for a public address
2631  */
2632 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2633                               struct timeval timeout, uint32_t destnode, 
2634                               ctdb_sock_addr *addr,
2635                               struct ctdb_tcp_array *tcparray)
2636 {
2637         int ret, num;
2638         TDB_DATA data;
2639         struct ctdb_control_tcp_tickle_list *list;
2640
2641         if (tcparray) {
2642                 num = tcparray->num;
2643         } else {
2644                 num = 0;
2645         }
2646
2647         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2648                                 tickles.connections) +
2649                         sizeof(struct ctdb_tcp_connection) * num;
2650         data.dptr = talloc_size(ctdb, data.dsize);
2651         CTDB_NO_MEMORY(ctdb, data.dptr);
2652
2653         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2654         list->addr = *addr;
2655         list->tickles.num = num;
2656         if (tcparray) {
2657                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2658         }
2659
2660         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2661                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2662                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2663         if (ret != 0) {
2664                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2665                 return -1;
2666         }
2667
2668         talloc_free(data.dptr);
2669
2670         return ret;
2671 }
2672
2673
2674 /*
2675   perform tickle updates if required
2676  */
2677 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2678                                 struct timed_event *te, 
2679                                 struct timeval t, void *private_data)
2680 {
2681         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2682         int ret;
2683         struct ctdb_vnn *vnn;
2684
2685         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2686                 /* we only send out updates for public addresses that 
2687                    we have taken over
2688                  */
2689                 if (ctdb->pnn != vnn->pnn) {
2690                         continue;
2691                 }
2692                 /* We only send out the updates if we need to */
2693                 if (!vnn->tcp_update_needed) {
2694                         continue;
2695                 }
2696                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2697                                 TAKEOVER_TIMEOUT(),
2698                                 CTDB_BROADCAST_CONNECTED,
2699                                 &vnn->public_address,
2700                                 vnn->tcp_array);
2701                 if (ret != 0) {
2702                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2703                                 ctdb_addr_to_str(&vnn->public_address)));
2704                 }
2705         }
2706
2707         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2708                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2709                              ctdb_update_tcp_tickles, ctdb);
2710 }               
2711         
2712
2713 /*
2714   start periodic update of tcp tickles
2715  */
2716 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2717 {
2718         ctdb->tickle_update_context = talloc_new(ctdb);
2719
2720         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2721                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2722                              ctdb_update_tcp_tickles, ctdb);
2723 }
2724
2725
2726
2727
2728 struct control_gratious_arp {
2729         struct ctdb_context *ctdb;
2730         ctdb_sock_addr addr;
2731         const char *iface;
2732         int count;
2733 };
2734
2735 /*
2736   send a control_gratuitous arp
2737  */
2738 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2739                                   struct timeval t, void *private_data)
2740 {
2741         int ret;
2742         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2743                                                         struct control_gratious_arp);
2744
2745         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2746         if (ret != 0) {
2747                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2748                                  arp->iface, strerror(errno)));
2749         }
2750
2751
2752         arp->count++;
2753         if (arp->count == CTDB_ARP_REPEAT) {
2754                 talloc_free(arp);
2755                 return;
2756         }
2757
2758         event_add_timed(arp->ctdb->ev, arp, 
2759                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2760                         send_gratious_arp, arp);
2761 }
2762
2763
2764 /*
2765   send a gratious arp 
2766  */
2767 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2768 {
2769         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2770         struct control_gratious_arp *arp;
2771
2772         /* verify the size of indata */
2773         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2774                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2775                                  (unsigned)indata.dsize, 
2776                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2777                 return -1;
2778         }
2779         if (indata.dsize != 
2780                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2781                 + gratious_arp->len ) ){
2782
2783                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2784                         "but should be %u bytes\n", 
2785                          (unsigned)indata.dsize, 
2786                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2787                 return -1;
2788         }
2789
2790
2791         arp = talloc(ctdb, struct control_gratious_arp);
2792         CTDB_NO_MEMORY(ctdb, arp);
2793
2794         arp->ctdb  = ctdb;
2795         arp->addr   = gratious_arp->addr;
2796         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2797         CTDB_NO_MEMORY(ctdb, arp->iface);
2798         arp->count = 0;
2799         
2800         event_add_timed(arp->ctdb->ev, arp, 
2801                         timeval_zero(), send_gratious_arp, arp);
2802
2803         return 0;
2804 }
2805
2806 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2807 {
2808         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2809         int ret;
2810
2811         /* verify the size of indata */
2812         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2813                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2814                 return -1;
2815         }
2816         if (indata.dsize != 
2817                 ( offsetof(struct ctdb_control_ip_iface, iface)
2818                 + pub->len ) ){
2819
2820                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2821                         "but should be %u bytes\n", 
2822                          (unsigned)indata.dsize, 
2823                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2824                 return -1;
2825         }
2826
2827         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2828
2829         if (ret != 0) {
2830                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2831                 return -1;
2832         }
2833
2834         return 0;
2835 }
2836
2837 /*
2838   called when releaseip event finishes for del_public_address
2839  */
2840 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2841                                 void *private_data)
2842 {
2843         talloc_free(private_data);
2844 }
2845
2846 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2847 {
2848         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2849         struct ctdb_vnn *vnn;
2850         int ret;
2851
2852         /* verify the size of indata */
2853         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2854                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2855                 return -1;
2856         }
2857         if (indata.dsize != 
2858                 ( offsetof(struct ctdb_control_ip_iface, iface)
2859                 + pub->len ) ){
2860
2861                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2862                         "but should be %u bytes\n", 
2863                          (unsigned)indata.dsize, 
2864                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2865                 return -1;
2866         }
2867
2868         /* walk over all public addresses until we find a match */
2869         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2870                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2871                         TALLOC_CTX *mem_ctx;
2872
2873                         DLIST_REMOVE(ctdb->vnn, vnn);
2874                         if (vnn->iface == NULL) {
2875                                 talloc_free(vnn);
2876                                 return 0;
2877                         }
2878
2879                         mem_ctx = talloc_new(ctdb);
2880                         ret = ctdb_event_script_callback(ctdb, 
2881                                          mem_ctx, delete_ip_callback, mem_ctx,
2882                                          false,
2883                                          CTDB_EVENT_RELEASE_IP,
2884                                          "%s %s %u",
2885                                          ctdb_vnn_iface_string(vnn),
2886                                          ctdb_addr_to_str(&vnn->public_address),
2887                                          vnn->public_netmask_bits);
2888                         ctdb_vnn_unassign_iface(ctdb, vnn);
2889                         talloc_free(vnn);
2890                         if (ret != 0) {
2891                                 return -1;
2892                         }
2893                         return 0;
2894                 }
2895         }
2896
2897         return -1;
2898 }
2899
2900 /* This function is called from the recovery daemon to verify that a remote
2901    node has the expected ip allocation.
2902    This is verified against ctdb->ip_tree
2903 */
2904 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2905 {
2906         struct ctdb_public_ip_list *tmp_ip; 
2907         int i;
2908
2909         if (ctdb->ip_tree == NULL) {
2910                 /* dont know the expected allocation yet, assume remote node
2911                    is correct. */
2912                 return 0;
2913         }
2914
2915         if (ips == NULL) {
2916                 return 0;
2917         }
2918
2919         for (i=0; i<ips->num; i++) {
2920                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2921                 if (tmp_ip == NULL) {
2922                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2923                         return -1;
2924                 }
2925
2926                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2927                         continue;
2928                 }
2929
2930                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2931                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2932                         return -1;
2933                 }
2934         }
2935
2936         return 0;
2937 }
2938
2939 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2940 {
2941         struct ctdb_public_ip_list *tmp_ip; 
2942
2943         if (ctdb->ip_tree == NULL) {
2944                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2945                 return -1;
2946         }
2947
2948         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2949         if (tmp_ip == NULL) {
2950                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2951                 return -1;
2952         }
2953
2954         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2955         tmp_ip->pnn = ip->pnn;
2956
2957         return 0;
2958 }