Add a new tunable : DisableIPFailover that when set to non 0
[ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334         TDB_DATA data;
335
336         if (status != 0) {
337                 if (status == -ETIME) {
338                         ctdb_ban_self(ctdb);
339                 }
340                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341                                  ctdb_addr_to_str(&state->vnn->public_address),
342                                  ctdb_vnn_iface_string(state->vnn)));
343                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
344                 talloc_free(state);
345                 return;
346         }
347
348         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
349         if (ret != 0) {
350                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
351                 talloc_free(state);
352                 return;
353         }
354
355         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
356         data.dsize = strlen((char *)data.dptr) + 1;
357         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
358
359         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
360
361
362         /* the control succeeded */
363         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
364         talloc_free(state);
365         return;
366 }
367
368 /*
369   take over an ip address
370  */
371 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
372                               struct ctdb_req_control *c,
373                               struct ctdb_vnn *vnn)
374 {
375         int ret;
376         struct ctdb_do_takeip_state *state;
377
378         ret = ctdb_vnn_assign_iface(ctdb, vnn);
379         if (ret != 0) {
380                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
381                                  "assin a usable interface\n",
382                                  ctdb_addr_to_str(&vnn->public_address),
383                                  vnn->public_netmask_bits));
384                 return -1;
385         }
386
387         state = talloc(vnn, struct ctdb_do_takeip_state);
388         CTDB_NO_MEMORY(ctdb, state);
389
390         state->c = talloc_steal(ctdb, c);
391         state->vnn   = vnn;
392
393         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
394                             ctdb_addr_to_str(&vnn->public_address),
395                             vnn->public_netmask_bits,
396                             ctdb_vnn_iface_string(vnn)));
397
398         ret = ctdb_event_script_callback(ctdb,
399                                          state,
400                                          ctdb_do_takeip_callback,
401                                          state,
402                                          false,
403                                          CTDB_EVENT_TAKE_IP,
404                                          "%s %s %u",
405                                          ctdb_vnn_iface_string(vnn),
406                                          ctdb_addr_to_str(&vnn->public_address),
407                                          vnn->public_netmask_bits);
408
409         if (ret != 0) {
410                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
411                         ctdb_addr_to_str(&vnn->public_address),
412                         ctdb_vnn_iface_string(vnn)));
413                 talloc_free(state);
414                 return -1;
415         }
416
417         return 0;
418 }
419
420 struct ctdb_do_updateip_state {
421         struct ctdb_req_control *c;
422         struct ctdb_iface *old;
423         struct ctdb_vnn *vnn;
424 };
425
426 /*
427   called when updateip event finishes
428  */
429 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
430                                       void *private_data)
431 {
432         struct ctdb_do_updateip_state *state =
433                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
434         int32_t ret;
435
436         if (status != 0) {
437                 if (status == -ETIME) {
438                         ctdb_ban_self(ctdb);
439                 }
440                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
441                         ctdb_addr_to_str(&state->vnn->public_address),
442                         state->old->name,
443                         ctdb_vnn_iface_string(state->vnn)));
444
445                 /*
446                  * All we can do is reset the old interface
447                  * and let the next run fix it
448                  */
449                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
450                 state->vnn->iface = state->old;
451                 state->vnn->iface->references++;
452
453                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
454                 talloc_free(state);
455                 return;
456         }
457
458         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
459         if (ret != 0) {
460                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
461                 talloc_free(state);
462                 return;
463         }
464
465         /* the control succeeded */
466         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
467         talloc_free(state);
468         return;
469 }
470
471 /*
472   update (move) an ip address
473  */
474 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
475                                 struct ctdb_req_control *c,
476                                 struct ctdb_vnn *vnn)
477 {
478         int ret;
479         struct ctdb_do_updateip_state *state;
480         struct ctdb_iface *old = vnn->iface;
481
482         ctdb_vnn_unassign_iface(ctdb, vnn);
483         ret = ctdb_vnn_assign_iface(ctdb, vnn);
484         if (ret != 0) {
485                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
486                                  "assin a usable interface (old iface '%s')\n",
487                                  ctdb_addr_to_str(&vnn->public_address),
488                                  vnn->public_netmask_bits,
489                                  old->name));
490                 return -1;
491         }
492
493         if (vnn->iface == old) {
494                 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
495                                  "assin a same interface '%s'\n",
496                                  ctdb_addr_to_str(&vnn->public_address),
497                                  vnn->public_netmask_bits,
498                                  old->name));
499                 return -1;
500         }
501
502         state = talloc(vnn, struct ctdb_do_updateip_state);
503         CTDB_NO_MEMORY(ctdb, state);
504
505         state->c = talloc_steal(ctdb, c);
506         state->old = old;
507         state->vnn = vnn;
508
509         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
510                             "interface %s to %s\n",
511                             ctdb_addr_to_str(&vnn->public_address),
512                             vnn->public_netmask_bits,
513                             old->name,
514                             ctdb_vnn_iface_string(vnn)));
515
516         ret = ctdb_event_script_callback(ctdb,
517                                          state,
518                                          ctdb_do_updateip_callback,
519                                          state,
520                                          false,
521                                          CTDB_EVENT_UPDATE_IP,
522                                          "%s %s %s %u",
523                                          state->old->name,
524                                          ctdb_vnn_iface_string(vnn),
525                                          ctdb_addr_to_str(&vnn->public_address),
526                                          vnn->public_netmask_bits);
527         if (ret != 0) {
528                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
529                                  ctdb_addr_to_str(&vnn->public_address),
530                                  old->name, ctdb_vnn_iface_string(vnn)));
531                 talloc_free(state);
532                 return -1;
533         }
534
535         return 0;
536 }
537
538 /*
539   Find the vnn of the node that has a public ip address
540   returns -1 if the address is not known as a public address
541  */
542 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
543 {
544         struct ctdb_vnn *vnn;
545
546         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
547                 if (ctdb_same_ip(&vnn->public_address, addr)) {
548                         return vnn;
549                 }
550         }
551
552         return NULL;
553 }
554
555 /*
556   take over an ip address
557  */
558 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
559                                  struct ctdb_req_control *c,
560                                  TDB_DATA indata,
561                                  bool *async_reply)
562 {
563         int ret;
564         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
565         struct ctdb_vnn *vnn;
566         bool have_ip = false;
567         bool do_updateip = false;
568         bool do_takeip = false;
569         struct ctdb_iface *best_iface = NULL;
570
571         if (pip->pnn != ctdb->pnn) {
572                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
573                                  "with pnn %d, but we're node %d\n",
574                                  ctdb_addr_to_str(&pip->addr),
575                                  pip->pnn, ctdb->pnn));
576                 return -1;
577         }
578
579         /* update out vnn list */
580         vnn = find_public_ip_vnn(ctdb, &pip->addr);
581         if (vnn == NULL) {
582                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
583                         ctdb_addr_to_str(&pip->addr)));
584                 return 0;
585         }
586
587         have_ip = ctdb_sys_have_ip(&pip->addr);
588         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
589         if (best_iface == NULL) {
590                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
591                                  "a usable interface (old %s, have_ip %d)\n",
592                                  ctdb_addr_to_str(&vnn->public_address),
593                                  vnn->public_netmask_bits,
594                                  ctdb_vnn_iface_string(vnn),
595                                  have_ip));
596                 return -1;
597         }
598
599         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
600                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
601                 have_ip = false;
602         }
603
604         if (vnn->iface == NULL && have_ip) {
605                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
606                                   "but we have no interface assigned, has someone manually configured it?"
607                                   "banning ourself\n",
608                                  ctdb_addr_to_str(&vnn->public_address)));
609                 ctdb_ban_self(ctdb);
610                 return -1;
611         }
612
613         if (vnn->pnn != ctdb->pnn && have_ip) {
614                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
615                                   "and we have it on iface[%s], but it was assigned to node %d"
616                                   "and we are node %d, banning ourself\n",
617                                  ctdb_addr_to_str(&vnn->public_address),
618                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
619                 ctdb_ban_self(ctdb);
620                 return -1;
621         }
622
623         if (vnn->iface) {
624                 if (vnn->iface->link_up) {
625                         /* only move when the rebalance gains something */
626                         if (vnn->iface->references > (best_iface->references + 1)) {
627                                 do_updateip = true;
628                         }
629                 } else if (vnn->iface != best_iface) {
630                         do_updateip = true;
631                 }
632         }
633
634         if (!have_ip) {
635                 if (do_updateip) {
636                         ctdb_vnn_unassign_iface(ctdb, vnn);
637                         do_updateip = false;
638                 }
639                 do_takeip = true;
640         }
641
642         if (do_takeip) {
643                 ret = ctdb_do_takeip(ctdb, c, vnn);
644                 if (ret != 0) {
645                         return -1;
646                 }
647         } else if (do_updateip) {
648                 ret = ctdb_do_updateip(ctdb, c, vnn);
649                 if (ret != 0) {
650                         return -1;
651                 }
652         } else {
653                 /*
654                  * The interface is up and the kernel known the ip
655                  * => do nothing
656                  */
657                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
658                         ctdb_addr_to_str(&pip->addr),
659                         vnn->public_netmask_bits,
660                         ctdb_vnn_iface_string(vnn)));
661                 return 0;
662         }
663
664         /* tell ctdb_control.c that we will be replying asynchronously */
665         *async_reply = true;
666
667         return 0;
668 }
669
670 /*
671   takeover an ip address old v4 style
672  */
673 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
674                                 struct ctdb_req_control *c,
675                                 TDB_DATA indata, 
676                                 bool *async_reply)
677 {
678         TDB_DATA data;
679         
680         data.dsize = sizeof(struct ctdb_public_ip);
681         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
682         CTDB_NO_MEMORY(ctdb, data.dptr);
683         
684         memcpy(data.dptr, indata.dptr, indata.dsize);
685         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
686 }
687
688 /*
689   kill any clients that are registered with a IP that is being released
690  */
691 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
692 {
693         struct ctdb_client_ip *ip;
694
695         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
696                 ctdb_addr_to_str(addr)));
697
698         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
699                 ctdb_sock_addr tmp_addr;
700
701                 tmp_addr = ip->addr;
702                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
703                         ip->client_id,
704                         ctdb_addr_to_str(&ip->addr)));
705
706                 if (ctdb_same_ip(&tmp_addr, addr)) {
707                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
708                                                                      ip->client_id, 
709                                                                      struct ctdb_client);
710                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
711                                 ip->client_id,
712                                 ctdb_addr_to_str(&ip->addr),
713                                 client->pid));
714
715                         if (client->pid != 0) {
716                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
717                                         (unsigned)client->pid,
718                                         ctdb_addr_to_str(addr),
719                                         ip->client_id));
720                                 kill(client->pid, SIGKILL);
721                         }
722                 }
723         }
724 }
725
726 /*
727   called when releaseip event finishes
728  */
729 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
730                                 void *private_data)
731 {
732         struct takeover_callback_state *state = 
733                 talloc_get_type(private_data, struct takeover_callback_state);
734         TDB_DATA data;
735
736         if (status == -ETIME) {
737                 ctdb_ban_self(ctdb);
738         }
739
740         /* send a message to all clients of this node telling them
741            that the cluster has been reconfigured and they should
742            release any sockets on this IP */
743         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
744         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
745         data.dsize = strlen((char *)data.dptr)+1;
746
747         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
748
749         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
750
751         /* kill clients that have registered with this IP */
752         release_kill_clients(ctdb, state->addr);
753
754         ctdb_vnn_unassign_iface(ctdb, state->vnn);
755
756         /* the control succeeded */
757         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
758         talloc_free(state);
759 }
760
761 /*
762   release an ip address
763  */
764 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
765                                 struct ctdb_req_control *c,
766                                 TDB_DATA indata, 
767                                 bool *async_reply)
768 {
769         int ret;
770         struct takeover_callback_state *state;
771         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
772         struct ctdb_vnn *vnn;
773
774         /* update our vnn list */
775         vnn = find_public_ip_vnn(ctdb, &pip->addr);
776         if (vnn == NULL) {
777                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
778                         ctdb_addr_to_str(&pip->addr)));
779                 return 0;
780         }
781         vnn->pnn = pip->pnn;
782
783         /* stop any previous arps */
784         talloc_free(vnn->takeover_ctx);
785         vnn->takeover_ctx = NULL;
786
787         if (!ctdb_sys_have_ip(&pip->addr)) {
788                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
789                         ctdb_addr_to_str(&pip->addr),
790                         vnn->public_netmask_bits, 
791                         ctdb_vnn_iface_string(vnn)));
792                 ctdb_vnn_unassign_iface(ctdb, vnn);
793                 return 0;
794         }
795
796         if (vnn->iface == NULL) {
797                 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
798                                   "but we have no interface assigned, has someone manually configured it?"
799                                   "banning ourself\n",
800                                  ctdb_addr_to_str(&vnn->public_address)));
801                 ctdb_ban_self(ctdb);
802                 return -1;
803         }
804
805         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
806                 ctdb_addr_to_str(&pip->addr),
807                 vnn->public_netmask_bits, 
808                 ctdb_vnn_iface_string(vnn),
809                 pip->pnn));
810
811         state = talloc(ctdb, struct takeover_callback_state);
812         CTDB_NO_MEMORY(ctdb, state);
813
814         state->c = talloc_steal(state, c);
815         state->addr = talloc(state, ctdb_sock_addr);       
816         CTDB_NO_MEMORY(ctdb, state->addr);
817         *state->addr = pip->addr;
818         state->vnn   = vnn;
819
820         ret = ctdb_event_script_callback(ctdb, 
821                                          state, release_ip_callback, state,
822                                          false,
823                                          CTDB_EVENT_RELEASE_IP,
824                                          "%s %s %u",
825                                          ctdb_vnn_iface_string(vnn),
826                                          ctdb_addr_to_str(&pip->addr),
827                                          vnn->public_netmask_bits);
828         if (ret != 0) {
829                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
830                         ctdb_addr_to_str(&pip->addr),
831                         ctdb_vnn_iface_string(vnn)));
832                 talloc_free(state);
833                 return -1;
834         }
835
836         /* tell the control that we will be reply asynchronously */
837         *async_reply = true;
838         return 0;
839 }
840
841 /*
842   release an ip address old v4 style
843  */
844 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
845                                 struct ctdb_req_control *c,
846                                 TDB_DATA indata, 
847                                 bool *async_reply)
848 {
849         TDB_DATA data;
850         
851         data.dsize = sizeof(struct ctdb_public_ip);
852         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
853         CTDB_NO_MEMORY(ctdb, data.dptr);
854         
855         memcpy(data.dptr, indata.dptr, indata.dsize);
856         return ctdb_control_release_ip(ctdb, c, data, async_reply);
857 }
858
859
860 static int ctdb_add_public_address(struct ctdb_context *ctdb,
861                                    ctdb_sock_addr *addr,
862                                    unsigned mask, const char *ifaces)
863 {
864         struct ctdb_vnn      *vnn;
865         uint32_t num = 0;
866         char *tmp;
867         const char *iface;
868         int i;
869         int ret;
870
871         /* Verify that we dont have an entry for this ip yet */
872         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
873                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
874                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
875                                 ctdb_addr_to_str(addr)));
876                         return -1;
877                 }               
878         }
879
880         /* create a new vnn structure for this ip address */
881         vnn = talloc_zero(ctdb, struct ctdb_vnn);
882         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
883         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
884         tmp = talloc_strdup(vnn, ifaces);
885         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
886         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
887                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
888                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
889                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
890                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
891                 num++;
892         }
893         talloc_free(tmp);
894         vnn->ifaces[num] = NULL;
895         vnn->public_address      = *addr;
896         vnn->public_netmask_bits = mask;
897         vnn->pnn                 = -1;
898
899         for (i=0; vnn->ifaces[i]; i++) {
900                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
901                 if (ret != 0) {
902                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
903                                            "for public_address[%s]\n",
904                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
905                         talloc_free(vnn);
906                         return -1;
907                 }
908                 if (i == 0) {
909                         vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
910                 }
911         }
912
913         DLIST_ADD(ctdb->vnn, vnn);
914
915         return 0;
916 }
917
918 /*
919   setup the event script directory
920 */
921 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
922 {
923         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
924         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
925         return 0;
926 }
927
928 /*
929   setup the public address lists from a file
930 */
931 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
932 {
933         char **lines;
934         int nlines;
935         int i;
936
937         lines = file_lines_load(alist, &nlines, ctdb);
938         if (lines == NULL) {
939                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
940                 return -1;
941         }
942         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
943                 nlines--;
944         }
945
946         for (i=0;i<nlines;i++) {
947                 unsigned mask;
948                 ctdb_sock_addr addr;
949                 const char *addrstr;
950                 const char *ifaces;
951                 char *tok, *line;
952
953                 line = lines[i];
954                 while ((*line == ' ') || (*line == '\t')) {
955                         line++;
956                 }
957                 if (*line == '#') {
958                         continue;
959                 }
960                 if (strcmp(line, "") == 0) {
961                         continue;
962                 }
963                 tok = strtok(line, " \t");
964                 addrstr = tok;
965                 tok = strtok(NULL, " \t");
966                 if (tok == NULL) {
967                         if (NULL == ctdb->default_public_interface) {
968                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
969                                          i+1));
970                                 talloc_free(lines);
971                                 return -1;
972                         }
973                         ifaces = ctdb->default_public_interface;
974                 } else {
975                         ifaces = tok;
976                 }
977
978                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
979                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
980                         talloc_free(lines);
981                         return -1;
982                 }
983                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
984                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
985                         talloc_free(lines);
986                         return -1;
987                 }
988         }
989
990         talloc_free(lines);
991         return 0;
992 }
993
994 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
995                               const char *iface,
996                               const char *ip)
997 {
998         struct ctdb_vnn *svnn;
999         bool ok;
1000         int ret;
1001
1002         svnn = talloc_zero(ctdb, struct ctdb_vnn);
1003         CTDB_NO_MEMORY(ctdb, svnn);
1004
1005         svnn->ifaces = talloc_array(svnn, const char *, 2);
1006         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1007         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1008         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1009         svnn->ifaces[1] = NULL;
1010
1011         ok = parse_ip(ip, iface, 0, &svnn->public_address);
1012         if (!ok) {
1013                 talloc_free(svnn);
1014                 return -1;
1015         }
1016
1017         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1018         if (ret != 0) {
1019                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1020                                    "for single_ip[%s]\n",
1021                                    svnn->ifaces[0],
1022                                    ctdb_addr_to_str(&svnn->public_address)));
1023                 talloc_free(svnn);
1024                 return -1;
1025         }
1026
1027         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1028         if (ret != 0) {
1029                 talloc_free(svnn);
1030                 return -1;
1031         }
1032
1033         ctdb->single_ip_vnn = svnn;
1034         return 0;
1035 }
1036
1037 struct ctdb_public_ip_list {
1038         struct ctdb_public_ip_list *next;
1039         uint32_t pnn;
1040         ctdb_sock_addr addr;
1041 };
1042
1043
1044 /* Given a physical node, return the number of
1045    public addresses that is currently assigned to this node.
1046 */
1047 static int node_ip_coverage(struct ctdb_context *ctdb, 
1048         int32_t pnn,
1049         struct ctdb_public_ip_list *ips)
1050 {
1051         int num=0;
1052
1053         for (;ips;ips=ips->next) {
1054                 if (ips->pnn == pnn) {
1055                         num++;
1056                 }
1057         }
1058         return num;
1059 }
1060
1061
1062 /* Check if this is a public ip known to the node, i.e. can that
1063    node takeover this ip ?
1064 */
1065 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1066                 struct ctdb_public_ip_list *ip)
1067 {
1068         struct ctdb_all_public_ips *public_ips;
1069         int i;
1070
1071         public_ips = ctdb->nodes[pnn]->available_public_ips;
1072
1073         if (public_ips == NULL) {
1074                 return -1;
1075         }
1076
1077         for (i=0;i<public_ips->num;i++) {
1078                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1079                         /* yes, this node can serve this public ip */
1080                         return 0;
1081                 }
1082         }
1083
1084         return -1;
1085 }
1086
1087
1088 /* search the node lists list for a node to takeover this ip.
1089    pick the node that currently are serving the least number of ips
1090    so that the ips get spread out evenly.
1091 */
1092 static int find_takeover_node(struct ctdb_context *ctdb, 
1093                 struct ctdb_node_map *nodemap, uint32_t mask, 
1094                 struct ctdb_public_ip_list *ip,
1095                 struct ctdb_public_ip_list *all_ips)
1096 {
1097         int pnn, min=0, num;
1098         int i;
1099
1100         pnn    = -1;
1101         for (i=0;i<nodemap->num;i++) {
1102                 if (nodemap->nodes[i].flags & mask) {
1103                         /* This node is not healty and can not be used to serve
1104                            a public address 
1105                         */
1106                         continue;
1107                 }
1108
1109                 /* verify that this node can serve this ip */
1110                 if (can_node_serve_ip(ctdb, i, ip)) {
1111                         /* no it couldnt   so skip to the next node */
1112                         continue;
1113                 }
1114
1115                 num = node_ip_coverage(ctdb, i, all_ips);
1116                 /* was this the first node we checked ? */
1117                 if (pnn == -1) {
1118                         pnn = i;
1119                         min  = num;
1120                 } else {
1121                         if (num < min) {
1122                                 pnn = i;
1123                                 min  = num;
1124                         }
1125                 }
1126         }       
1127         if (pnn == -1) {
1128                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1129                         ctdb_addr_to_str(&ip->addr)));
1130
1131                 return -1;
1132         }
1133
1134         ip->pnn = pnn;
1135         return 0;
1136 }
1137
1138 #define IP_KEYLEN       4
1139 static uint32_t *ip_key(ctdb_sock_addr *ip)
1140 {
1141         static uint32_t key[IP_KEYLEN];
1142
1143         bzero(key, sizeof(key));
1144
1145         switch (ip->sa.sa_family) {
1146         case AF_INET:
1147                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1148                 break;
1149         case AF_INET6:
1150                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1151                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1152                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1153                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1154                 break;
1155         default:
1156                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1157                 return key;
1158         }
1159
1160         return key;
1161 }
1162
1163 static void *add_ip_callback(void *parm, void *data)
1164 {
1165         return parm;
1166 }
1167
1168 void getips_count_callback(void *param, void *data)
1169 {
1170         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1171         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1172
1173         new_ip->next = *ip_list;
1174         *ip_list     = new_ip;
1175 }
1176
1177 static struct ctdb_public_ip_list *
1178 create_merged_ip_list(struct ctdb_context *ctdb)
1179 {
1180         int i, j;
1181         struct ctdb_public_ip_list *ip_list;
1182         struct ctdb_all_public_ips *public_ips;
1183
1184         if (ctdb->ip_tree != NULL) {
1185                 talloc_free(ctdb->ip_tree);
1186                 ctdb->ip_tree = NULL;
1187         }
1188         ctdb->ip_tree = trbt_create(ctdb, 0);
1189
1190         for (i=0;i<ctdb->num_nodes;i++) {
1191                 public_ips = ctdb->nodes[i]->known_public_ips;
1192
1193                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1194                         continue;
1195                 }
1196
1197                 /* there were no public ips for this node */
1198                 if (public_ips == NULL) {
1199                         continue;
1200                 }               
1201
1202                 for (j=0;j<public_ips->num;j++) {
1203                         struct ctdb_public_ip_list *tmp_ip; 
1204
1205                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1206                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1207                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1208                         tmp_ip->addr = public_ips->ips[j].addr;
1209                         tmp_ip->next = NULL;
1210
1211                         trbt_insertarray32_callback(ctdb->ip_tree,
1212                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1213                                 add_ip_callback,
1214                                 tmp_ip);
1215                 }
1216         }
1217
1218         ip_list = NULL;
1219         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1220
1221         return ip_list;
1222 }
1223
1224 /*
1225   make any IP alias changes for public addresses that are necessary 
1226  */
1227 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1228 {
1229         int i, num_healthy, retries;
1230         struct ctdb_public_ip ip;
1231         struct ctdb_public_ipv4 ipv4;
1232         uint32_t mask, *nodes;
1233         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1234         int maxnode, maxnum=0, minnode, minnum=0, num;
1235         TDB_DATA data;
1236         struct timeval timeout;
1237         struct client_async_data *async_data;
1238         struct ctdb_client_control_state *state;
1239         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1240
1241         /*
1242          * ip failover is completely disabled, just send out the 
1243          * ipreallocated event.
1244          */
1245         if (ctdb->tunable.disable_ip_failover != 0) {
1246                 goto ipreallocated;
1247         }
1248
1249         ZERO_STRUCT(ip);
1250
1251         /* Count how many completely healthy nodes we have */
1252         num_healthy = 0;
1253         for (i=0;i<nodemap->num;i++) {
1254                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1255                         num_healthy++;
1256                 }
1257         }
1258
1259         if (num_healthy > 0) {
1260                 /* We have healthy nodes, so only consider them for 
1261                    serving public addresses
1262                 */
1263                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1264         } else {
1265                 /* We didnt have any completely healthy nodes so
1266                    use "disabled" nodes as a fallback
1267                 */
1268                 mask = NODE_FLAGS_INACTIVE;
1269         }
1270
1271         /* since nodes only know about those public addresses that
1272            can be served by that particular node, no single node has
1273            a full list of all public addresses that exist in the cluster.
1274            Walk over all node structures and create a merged list of
1275            all public addresses that exist in the cluster.
1276
1277            keep the tree of ips around as ctdb->ip_tree
1278         */
1279         all_ips = create_merged_ip_list(ctdb);
1280
1281         /* If we want deterministic ip allocations, i.e. that the ip addresses
1282            will always be allocated the same way for a specific set of
1283            available/unavailable nodes.
1284         */
1285         if (1 == ctdb->tunable.deterministic_public_ips) {              
1286                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1287                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1288                         tmp_ip->pnn = i%nodemap->num;
1289                 }
1290         }
1291
1292
1293         /* mark all public addresses with a masked node as being served by
1294            node -1
1295         */
1296         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1297                 if (tmp_ip->pnn == -1) {
1298                         continue;
1299                 }
1300                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1301                         tmp_ip->pnn = -1;
1302                 }
1303         }
1304
1305         /* verify that the assigned nodes can serve that public ip
1306            and set it to -1 if not
1307         */
1308         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1309                 if (tmp_ip->pnn == -1) {
1310                         continue;
1311                 }
1312                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1313                         /* this node can not serve this ip. */
1314                         tmp_ip->pnn = -1;
1315                 }
1316         }
1317
1318
1319         /* now we must redistribute all public addresses with takeover node
1320            -1 among the nodes available
1321         */
1322         retries = 0;
1323 try_again:
1324         /* loop over all ip's and find a physical node to cover for 
1325            each unassigned ip.
1326         */
1327         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1328                 if (tmp_ip->pnn == -1) {
1329                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1330                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1331                                         ctdb_addr_to_str(&tmp_ip->addr)));
1332                         }
1333                 }
1334         }
1335
1336         /* If we dont want ips to fail back after a node becomes healthy
1337            again, we wont even try to reallocat the ip addresses so that
1338            they are evenly spread out.
1339            This can NOT be used at the same time as DeterministicIPs !
1340         */
1341         if (1 == ctdb->tunable.no_ip_failback) {
1342                 if (1 == ctdb->tunable.deterministic_public_ips) {
1343                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1344                 }
1345                 goto finished;
1346         }
1347
1348
1349         /* now, try to make sure the ip adresses are evenly distributed
1350            across the node.
1351            for each ip address, loop over all nodes that can serve this
1352            ip and make sure that the difference between the node
1353            serving the most and the node serving the least ip's are not greater
1354            than 1.
1355         */
1356         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1357                 if (tmp_ip->pnn == -1) {
1358                         continue;
1359                 }
1360
1361                 /* Get the highest and lowest number of ips's served by any 
1362                    valid node which can serve this ip.
1363                 */
1364                 maxnode = -1;
1365                 minnode = -1;
1366                 for (i=0;i<nodemap->num;i++) {
1367                         if (nodemap->nodes[i].flags & mask) {
1368                                 continue;
1369                         }
1370
1371                         /* only check nodes that can actually serve this ip */
1372                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1373                                 /* no it couldnt   so skip to the next node */
1374                                 continue;
1375                         }
1376
1377                         num = node_ip_coverage(ctdb, i, all_ips);
1378                         if (maxnode == -1) {
1379                                 maxnode = i;
1380                                 maxnum  = num;
1381                         } else {
1382                                 if (num > maxnum) {
1383                                         maxnode = i;
1384                                         maxnum  = num;
1385                                 }
1386                         }
1387                         if (minnode == -1) {
1388                                 minnode = i;
1389                                 minnum  = num;
1390                         } else {
1391                                 if (num < minnum) {
1392                                         minnode = i;
1393                                         minnum  = num;
1394                                 }
1395                         }
1396                 }
1397                 if (maxnode == -1) {
1398                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1399                                 ctdb_addr_to_str(&tmp_ip->addr)));
1400
1401                         continue;
1402                 }
1403
1404                 /* If we want deterministic IPs then dont try to reallocate 
1405                    them to spread out the load.
1406                 */
1407                 if (1 == ctdb->tunable.deterministic_public_ips) {
1408                         continue;
1409                 }
1410
1411                 /* if the spread between the smallest and largest coverage by
1412                    a node is >=2 we steal one of the ips from the node with
1413                    most coverage to even things out a bit.
1414                    try to do this at most 5 times  since we dont want to spend
1415                    too much time balancing the ip coverage.
1416                 */
1417                 if ( (maxnum > minnum+1)
1418                   && (retries < 5) ){
1419                         struct ctdb_public_ip_list *tmp;
1420
1421                         /* mark one of maxnode's vnn's as unassigned and try
1422                            again
1423                         */
1424                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1425                                 if (tmp->pnn == maxnode) {
1426                                         tmp->pnn = -1;
1427                                         retries++;
1428                                         goto try_again;
1429                                 }
1430                         }
1431                 }
1432         }
1433
1434
1435         /* finished distributing the public addresses, now just send the 
1436            info out to the nodes
1437         */
1438 finished:
1439
1440         /* at this point ->pnn is the node which will own each IP
1441            or -1 if there is no node that can cover this ip
1442         */
1443
1444         /* now tell all nodes to delete any alias that they should not
1445            have.  This will be a NOOP on nodes that don't currently
1446            hold the given alias */
1447         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1448         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1449
1450         for (i=0;i<nodemap->num;i++) {
1451                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1452                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1453                         continue;
1454                 }
1455
1456                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1457                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1458                                 /* This node should be serving this
1459                                    vnn so dont tell it to release the ip
1460                                 */
1461                                 continue;
1462                         }
1463                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1464                                 ipv4.pnn = tmp_ip->pnn;
1465                                 ipv4.sin = tmp_ip->addr.ip;
1466
1467                                 timeout = TAKEOVER_TIMEOUT();
1468                                 data.dsize = sizeof(ipv4);
1469                                 data.dptr  = (uint8_t *)&ipv4;
1470                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1471                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1472                                                 data, async_data,
1473                                                 &timeout, NULL);
1474                         } else {
1475                                 ip.pnn  = tmp_ip->pnn;
1476                                 ip.addr = tmp_ip->addr;
1477
1478                                 timeout = TAKEOVER_TIMEOUT();
1479                                 data.dsize = sizeof(ip);
1480                                 data.dptr  = (uint8_t *)&ip;
1481                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1482                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1483                                                 data, async_data,
1484                                                 &timeout, NULL);
1485                         }
1486
1487                         if (state == NULL) {
1488                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1489                                 talloc_free(tmp_ctx);
1490                                 return -1;
1491                         }
1492                 
1493                         ctdb_client_async_add(async_data, state);
1494                 }
1495         }
1496         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1497                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1498                 talloc_free(tmp_ctx);
1499                 return -1;
1500         }
1501         talloc_free(async_data);
1502
1503
1504         /* tell all nodes to get their own IPs */
1505         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1506         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1507         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1508                 if (tmp_ip->pnn == -1) {
1509                         /* this IP won't be taken over */
1510                         continue;
1511                 }
1512
1513                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1514                         ipv4.pnn = tmp_ip->pnn;
1515                         ipv4.sin = tmp_ip->addr.ip;
1516
1517                         timeout = TAKEOVER_TIMEOUT();
1518                         data.dsize = sizeof(ipv4);
1519                         data.dptr  = (uint8_t *)&ipv4;
1520                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1521                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1522                                         data, async_data,
1523                                         &timeout, NULL);
1524                 } else {
1525                         ip.pnn  = tmp_ip->pnn;
1526                         ip.addr = tmp_ip->addr;
1527
1528                         timeout = TAKEOVER_TIMEOUT();
1529                         data.dsize = sizeof(ip);
1530                         data.dptr  = (uint8_t *)&ip;
1531                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1532                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1533                                         data, async_data,
1534                                         &timeout, NULL);
1535                 }
1536                 if (state == NULL) {
1537                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1538                         talloc_free(tmp_ctx);
1539                         return -1;
1540                 }
1541                 
1542                 ctdb_client_async_add(async_data, state);
1543         }
1544         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1545                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1546                 talloc_free(tmp_ctx);
1547                 return -1;
1548         }
1549
1550 ipreallocated:
1551         /* tell all nodes to update natwg */
1552         /* send the flags update natgw on all connected nodes */
1553         data.dptr  = discard_const("ipreallocated");
1554         data.dsize = strlen((char *)data.dptr) + 1; 
1555         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1556         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1557                                       nodes, 0, TAKEOVER_TIMEOUT(),
1558                                       false, data,
1559                                       NULL, NULL,
1560                                       NULL) != 0) {
1561                 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1562         }
1563
1564         talloc_free(tmp_ctx);
1565         return 0;
1566 }
1567
1568
1569 /*
1570   destroy a ctdb_client_ip structure
1571  */
1572 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1573 {
1574         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1575                 ctdb_addr_to_str(&ip->addr),
1576                 ntohs(ip->addr.ip.sin_port),
1577                 ip->client_id));
1578
1579         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1580         return 0;
1581 }
1582
1583 /*
1584   called by a client to inform us of a TCP connection that it is managing
1585   that should tickled with an ACK when IP takeover is done
1586   we handle both the old ipv4 style of packets as well as the new ipv4/6
1587   pdus.
1588  */
1589 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1590                                 TDB_DATA indata)
1591 {
1592         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1593         struct ctdb_control_tcp *old_addr = NULL;
1594         struct ctdb_control_tcp_addr new_addr;
1595         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1596         struct ctdb_tcp_list *tcp;
1597         struct ctdb_tcp_connection t;
1598         int ret;
1599         TDB_DATA data;
1600         struct ctdb_client_ip *ip;
1601         struct ctdb_vnn *vnn;
1602         ctdb_sock_addr addr;
1603
1604         switch (indata.dsize) {
1605         case sizeof(struct ctdb_control_tcp):
1606                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1607                 ZERO_STRUCT(new_addr);
1608                 tcp_sock = &new_addr;
1609                 tcp_sock->src.ip  = old_addr->src;
1610                 tcp_sock->dest.ip = old_addr->dest;
1611                 break;
1612         case sizeof(struct ctdb_control_tcp_addr):
1613                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1614                 break;
1615         default:
1616                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1617                                  "to ctdb_control_tcp_client. size was %d but "
1618                                  "only allowed sizes are %lu and %lu\n",
1619                                  (int)indata.dsize,
1620                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1621                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1622                 return -1;
1623         }
1624
1625         addr = tcp_sock->src;
1626         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1627         addr = tcp_sock->dest;
1628         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1629
1630         ZERO_STRUCT(addr);
1631         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1632         vnn = find_public_ip_vnn(ctdb, &addr);
1633         if (vnn == NULL) {
1634                 switch (addr.sa.sa_family) {
1635                 case AF_INET:
1636                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1637                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1638                                         ctdb_addr_to_str(&addr)));
1639                         }
1640                         break;
1641                 case AF_INET6:
1642                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1643                                 ctdb_addr_to_str(&addr)));
1644                         break;
1645                 default:
1646                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1647                 }
1648
1649                 return 0;
1650         }
1651
1652         if (vnn->pnn != ctdb->pnn) {
1653                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1654                         ctdb_addr_to_str(&addr),
1655                         client_id, client->pid));
1656                 /* failing this call will tell smbd to die */
1657                 return -1;
1658         }
1659
1660         ip = talloc(client, struct ctdb_client_ip);
1661         CTDB_NO_MEMORY(ctdb, ip);
1662
1663         ip->ctdb      = ctdb;
1664         ip->addr      = addr;
1665         ip->client_id = client_id;
1666         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1667         DLIST_ADD(ctdb->client_ip_list, ip);
1668
1669         tcp = talloc(client, struct ctdb_tcp_list);
1670         CTDB_NO_MEMORY(ctdb, tcp);
1671
1672         tcp->connection.src_addr = tcp_sock->src;
1673         tcp->connection.dst_addr = tcp_sock->dest;
1674
1675         DLIST_ADD(client->tcp_list, tcp);
1676
1677         t.src_addr = tcp_sock->src;
1678         t.dst_addr = tcp_sock->dest;
1679
1680         data.dptr = (uint8_t *)&t;
1681         data.dsize = sizeof(t);
1682
1683         switch (addr.sa.sa_family) {
1684         case AF_INET:
1685                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1686                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1687                         ctdb_addr_to_str(&tcp_sock->src),
1688                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1689                 break;
1690         case AF_INET6:
1691                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1692                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1693                         ctdb_addr_to_str(&tcp_sock->src),
1694                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1695                 break;
1696         default:
1697                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1698         }
1699
1700
1701         /* tell all nodes about this tcp connection */
1702         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1703                                        CTDB_CONTROL_TCP_ADD,
1704                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1705         if (ret != 0) {
1706                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1707                 return -1;
1708         }
1709
1710         return 0;
1711 }
1712
1713 /*
1714   find a tcp address on a list
1715  */
1716 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1717                                            struct ctdb_tcp_connection *tcp)
1718 {
1719         int i;
1720
1721         if (array == NULL) {
1722                 return NULL;
1723         }
1724
1725         for (i=0;i<array->num;i++) {
1726                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1727                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1728                         return &array->connections[i];
1729                 }
1730         }
1731         return NULL;
1732 }
1733
1734
1735
1736 /*
1737   called by a daemon to inform us of a TCP connection that one of its
1738   clients managing that should tickled with an ACK when IP takeover is
1739   done
1740  */
1741 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1742 {
1743         struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1744         struct ctdb_tcp_array *tcparray;
1745         struct ctdb_tcp_connection tcp;
1746         struct ctdb_vnn *vnn;
1747
1748         vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1749         if (vnn == NULL) {
1750                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1751                         ctdb_addr_to_str(&p->dst_addr)));
1752
1753                 return -1;
1754         }
1755
1756
1757         tcparray = vnn->tcp_array;
1758
1759         /* If this is the first tickle */
1760         if (tcparray == NULL) {
1761                 tcparray = talloc_size(ctdb->nodes, 
1762                         offsetof(struct ctdb_tcp_array, connections) +
1763                         sizeof(struct ctdb_tcp_connection) * 1);
1764                 CTDB_NO_MEMORY(ctdb, tcparray);
1765                 vnn->tcp_array = tcparray;
1766
1767                 tcparray->num = 0;
1768                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1769                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1770
1771                 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1772                 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1773                 tcparray->num++;
1774
1775                 if (tcp_update_needed) {
1776                         vnn->tcp_update_needed = true;
1777                 }
1778                 return 0;
1779         }
1780
1781
1782         /* Do we already have this tickle ?*/
1783         tcp.src_addr = p->src_addr;
1784         tcp.dst_addr = p->dst_addr;
1785         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1786                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1787                         ctdb_addr_to_str(&tcp.dst_addr),
1788                         ntohs(tcp.dst_addr.ip.sin_port),
1789                         vnn->pnn));
1790                 return 0;
1791         }
1792
1793         /* A new tickle, we must add it to the array */
1794         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1795                                         struct ctdb_tcp_connection,
1796                                         tcparray->num+1);
1797         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1798
1799         vnn->tcp_array = tcparray;
1800         tcparray->connections[tcparray->num].src_addr = p->src_addr;
1801         tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1802         tcparray->num++;
1803                                 
1804         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1805                 ctdb_addr_to_str(&tcp.dst_addr),
1806                 ntohs(tcp.dst_addr.ip.sin_port),
1807                 vnn->pnn));
1808
1809         if (tcp_update_needed) {
1810                 vnn->tcp_update_needed = true;
1811         }
1812
1813         return 0;
1814 }
1815
1816
1817 /*
1818   called by a daemon to inform us of a TCP connection that one of its
1819   clients managing that should tickled with an ACK when IP takeover is
1820   done
1821  */
1822 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1823 {
1824         struct ctdb_tcp_connection *tcpp;
1825         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1826
1827         if (vnn == NULL) {
1828                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1829                         ctdb_addr_to_str(&conn->dst_addr)));
1830                 return;
1831         }
1832
1833         /* if the array is empty we cant remove it
1834            and we dont need to do anything
1835          */
1836         if (vnn->tcp_array == NULL) {
1837                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1838                         ctdb_addr_to_str(&conn->dst_addr),
1839                         ntohs(conn->dst_addr.ip.sin_port)));
1840                 return;
1841         }
1842
1843
1844         /* See if we know this connection
1845            if we dont know this connection  then we dont need to do anything
1846          */
1847         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1848         if (tcpp == NULL) {
1849                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1850                         ctdb_addr_to_str(&conn->dst_addr),
1851                         ntohs(conn->dst_addr.ip.sin_port)));
1852                 return;
1853         }
1854
1855
1856         /* We need to remove this entry from the array.
1857            Instead of allocating a new array and copying data to it
1858            we cheat and just copy the last entry in the existing array
1859            to the entry that is to be removed and just shring the 
1860            ->num field
1861          */
1862         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1863         vnn->tcp_array->num--;
1864
1865         /* If we deleted the last entry we also need to remove the entire array
1866          */
1867         if (vnn->tcp_array->num == 0) {
1868                 talloc_free(vnn->tcp_array);
1869                 vnn->tcp_array = NULL;
1870         }               
1871
1872         vnn->tcp_update_needed = true;
1873
1874         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1875                 ctdb_addr_to_str(&conn->src_addr),
1876                 ntohs(conn->src_addr.ip.sin_port)));
1877 }
1878
1879
1880 /*
1881   called by a daemon to inform us of a TCP connection that one of its
1882   clients used are no longer needed in the tickle database
1883  */
1884 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1885 {
1886         struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1887
1888         ctdb_remove_tcp_connection(ctdb, conn);
1889
1890         return 0;
1891 }
1892
1893
1894 /*
1895   called when a daemon restarts - send all tickes for all public addresses
1896   we are serving immediately to the new node.
1897  */
1898 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1899 {
1900 /*XXX here we should send all tickes we are serving to the new node */
1901         return 0;
1902 }
1903
1904
1905 /*
1906   called when a client structure goes away - hook to remove
1907   elements from the tcp_list in all daemons
1908  */
1909 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1910 {
1911         while (client->tcp_list) {
1912                 struct ctdb_tcp_list *tcp = client->tcp_list;
1913                 DLIST_REMOVE(client->tcp_list, tcp);
1914                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1915         }
1916 }
1917
1918
1919 /*
1920   release all IPs on shutdown
1921  */
1922 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1923 {
1924         struct ctdb_vnn *vnn;
1925
1926         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1927                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1928                         ctdb_vnn_unassign_iface(ctdb, vnn);
1929                         continue;
1930                 }
1931                 if (!vnn->iface) {
1932                         continue;
1933                 }
1934                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1935                                   ctdb_vnn_iface_string(vnn),
1936                                   ctdb_addr_to_str(&vnn->public_address),
1937                                   vnn->public_netmask_bits);
1938                 release_kill_clients(ctdb, &vnn->public_address);
1939                 ctdb_vnn_unassign_iface(ctdb, vnn);
1940         }
1941 }
1942
1943
1944 /*
1945   get list of public IPs
1946  */
1947 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1948                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1949 {
1950         int i, num, len;
1951         struct ctdb_all_public_ips *ips;
1952         struct ctdb_vnn *vnn;
1953         bool only_available = false;
1954
1955         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1956                 only_available = true;
1957         }
1958
1959         /* count how many public ip structures we have */
1960         num = 0;
1961         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1962                 num++;
1963         }
1964
1965         len = offsetof(struct ctdb_all_public_ips, ips) + 
1966                 num*sizeof(struct ctdb_public_ip);
1967         ips = talloc_zero_size(outdata, len);
1968         CTDB_NO_MEMORY(ctdb, ips);
1969
1970         i = 0;
1971         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1972                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1973                         continue;
1974                 }
1975                 ips->ips[i].pnn  = vnn->pnn;
1976                 ips->ips[i].addr = vnn->public_address;
1977                 i++;
1978         }
1979         ips->num = i;
1980         len = offsetof(struct ctdb_all_public_ips, ips) +
1981                 i*sizeof(struct ctdb_public_ip);
1982
1983         outdata->dsize = len;
1984         outdata->dptr  = (uint8_t *)ips;
1985
1986         return 0;
1987 }
1988
1989
1990 /*
1991   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1992  */
1993 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1994                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1995 {
1996         int i, num, len;
1997         struct ctdb_all_public_ipsv4 *ips;
1998         struct ctdb_vnn *vnn;
1999
2000         /* count how many public ip structures we have */
2001         num = 0;
2002         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2003                 if (vnn->public_address.sa.sa_family != AF_INET) {
2004                         continue;
2005                 }
2006                 num++;
2007         }
2008
2009         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
2010                 num*sizeof(struct ctdb_public_ipv4);
2011         ips = talloc_zero_size(outdata, len);
2012         CTDB_NO_MEMORY(ctdb, ips);
2013
2014         outdata->dsize = len;
2015         outdata->dptr  = (uint8_t *)ips;
2016
2017         ips->num = num;
2018         i = 0;
2019         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2020                 if (vnn->public_address.sa.sa_family != AF_INET) {
2021                         continue;
2022                 }
2023                 ips->ips[i].pnn = vnn->pnn;
2024                 ips->ips[i].sin = vnn->public_address.ip;
2025                 i++;
2026         }
2027
2028         return 0;
2029 }
2030
2031 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2032                                         struct ctdb_req_control *c,
2033                                         TDB_DATA indata,
2034                                         TDB_DATA *outdata)
2035 {
2036         int i, num, len;
2037         ctdb_sock_addr *addr;
2038         struct ctdb_control_public_ip_info *info;
2039         struct ctdb_vnn *vnn;
2040
2041         addr = (ctdb_sock_addr *)indata.dptr;
2042
2043         vnn = find_public_ip_vnn(ctdb, addr);
2044         if (vnn == NULL) {
2045                 /* if it is not a public ip   it could be our 'single ip' */
2046                 if (ctdb->single_ip_vnn) {
2047                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2048                                 vnn = ctdb->single_ip_vnn;
2049                         }
2050                 }
2051         }
2052         if (vnn == NULL) {
2053                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2054                                  "'%s'not a public address\n",
2055                                  ctdb_addr_to_str(addr)));
2056                 return -1;
2057         }
2058
2059         /* count how many public ip structures we have */
2060         num = 0;
2061         for (;vnn->ifaces[num];) {
2062                 num++;
2063         }
2064
2065         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2066                 num*sizeof(struct ctdb_control_iface_info);
2067         info = talloc_zero_size(outdata, len);
2068         CTDB_NO_MEMORY(ctdb, info);
2069
2070         info->ip.addr = vnn->public_address;
2071         info->ip.pnn = vnn->pnn;
2072         info->active_idx = 0xFFFFFFFF;
2073
2074         for (i=0; vnn->ifaces[i]; i++) {
2075                 struct ctdb_iface *cur;
2076
2077                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2078                 if (cur == NULL) {
2079                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2080                                            vnn->ifaces[i]));
2081                         return -1;
2082                 }
2083                 if (vnn->iface == cur) {
2084                         info->active_idx = i;
2085                 }
2086                 strcpy(info->ifaces[i].name, cur->name);
2087                 info->ifaces[i].link_state = cur->link_up;
2088                 info->ifaces[i].references = cur->references;
2089         }
2090         info->num = i;
2091         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2092                 i*sizeof(struct ctdb_control_iface_info);
2093
2094         outdata->dsize = len;
2095         outdata->dptr  = (uint8_t *)info;
2096
2097         return 0;
2098 }
2099
2100 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2101                                 struct ctdb_req_control *c,
2102                                 TDB_DATA *outdata)
2103 {
2104         int i, num, len;
2105         struct ctdb_control_get_ifaces *ifaces;
2106         struct ctdb_iface *cur;
2107
2108         /* count how many public ip structures we have */
2109         num = 0;
2110         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2111                 num++;
2112         }
2113
2114         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2115                 num*sizeof(struct ctdb_control_iface_info);
2116         ifaces = talloc_zero_size(outdata, len);
2117         CTDB_NO_MEMORY(ctdb, ifaces);
2118
2119         i = 0;
2120         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2121                 strcpy(ifaces->ifaces[i].name, cur->name);
2122                 ifaces->ifaces[i].link_state = cur->link_up;
2123                 ifaces->ifaces[i].references = cur->references;
2124                 i++;
2125         }
2126         ifaces->num = i;
2127         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2128                 i*sizeof(struct ctdb_control_iface_info);
2129
2130         outdata->dsize = len;
2131         outdata->dptr  = (uint8_t *)ifaces;
2132
2133         return 0;
2134 }
2135
2136 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2137                                     struct ctdb_req_control *c,
2138                                     TDB_DATA indata)
2139 {
2140         struct ctdb_control_iface_info *info;
2141         struct ctdb_iface *iface;
2142         bool link_up = false;
2143
2144         info = (struct ctdb_control_iface_info *)indata.dptr;
2145
2146         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2147                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2148                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2149                                   len, len, info->name));
2150                 return -1;
2151         }
2152
2153         switch (info->link_state) {
2154         case 0:
2155                 link_up = false;
2156                 break;
2157         case 1:
2158                 link_up = true;
2159                 break;
2160         default:
2161                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2162                                   (unsigned int)info->link_state));
2163                 return -1;
2164         }
2165
2166         if (info->references != 0) {
2167                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2168                                   (unsigned int)info->references));
2169                 return -1;
2170         }
2171
2172         iface = ctdb_find_iface(ctdb, info->name);
2173         if (iface == NULL) {
2174                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2175                                   info->name));
2176                 return -1;
2177         }
2178
2179         if (link_up == iface->link_up) {
2180                 return 0;
2181         }
2182
2183         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2184               ("iface[%s] has changed it's link status %s => %s\n",
2185                iface->name,
2186                iface->link_up?"up":"down",
2187                link_up?"up":"down"));
2188
2189         iface->link_up = link_up;
2190         return 0;
2191 }
2192
2193
2194 /* 
2195    structure containing the listening socket and the list of tcp connections
2196    that the ctdb daemon is to kill
2197 */
2198 struct ctdb_kill_tcp {
2199         struct ctdb_vnn *vnn;
2200         struct ctdb_context *ctdb;
2201         int capture_fd;
2202         struct fd_event *fde;
2203         trbt_tree_t *connections;
2204         void *private_data;
2205 };
2206
2207 /*
2208   a tcp connection that is to be killed
2209  */
2210 struct ctdb_killtcp_con {
2211         ctdb_sock_addr src_addr;
2212         ctdb_sock_addr dst_addr;
2213         int count;
2214         struct ctdb_kill_tcp *killtcp;
2215 };
2216
2217 /* this function is used to create a key to represent this socketpair
2218    in the killtcp tree.
2219    this key is used to insert and lookup matching socketpairs that are
2220    to be tickled and RST
2221 */
2222 #define KILLTCP_KEYLEN  10
2223 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2224 {
2225         static uint32_t key[KILLTCP_KEYLEN];
2226
2227         bzero(key, sizeof(key));
2228
2229         if (src->sa.sa_family != dst->sa.sa_family) {
2230                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2231                 return key;
2232         }
2233         
2234         switch (src->sa.sa_family) {
2235         case AF_INET:
2236                 key[0]  = dst->ip.sin_addr.s_addr;
2237                 key[1]  = src->ip.sin_addr.s_addr;
2238                 key[2]  = dst->ip.sin_port;
2239                 key[3]  = src->ip.sin_port;
2240                 break;
2241         case AF_INET6:
2242                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2243                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2244                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2245                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2246                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2247                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2248                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2249                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2250                 key[8]  = dst->ip6.sin6_port;
2251                 key[9]  = src->ip6.sin6_port;
2252                 break;
2253         default:
2254                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2255                 return key;
2256         }
2257
2258         return key;
2259 }
2260
2261 /*
2262   called when we get a read event on the raw socket
2263  */
2264 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2265                                 uint16_t flags, void *private_data)
2266 {
2267         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2268         struct ctdb_killtcp_con *con;
2269         ctdb_sock_addr src, dst;
2270         uint32_t ack_seq, seq;
2271
2272         if (!(flags & EVENT_FD_READ)) {
2273                 return;
2274         }
2275
2276         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2277                                 killtcp->private_data,
2278                                 &src, &dst,
2279                                 &ack_seq, &seq) != 0) {
2280                 /* probably a non-tcp ACK packet */
2281                 return;
2282         }
2283
2284         /* check if we have this guy in our list of connections
2285            to kill
2286         */
2287         con = trbt_lookuparray32(killtcp->connections, 
2288                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2289         if (con == NULL) {
2290                 /* no this was some other packet we can just ignore */
2291                 return;
2292         }
2293
2294         /* This one has been tickled !
2295            now reset him and remove him from the list.
2296          */
2297         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2298                 ntohs(con->dst_addr.ip.sin_port),
2299                 ctdb_addr_to_str(&con->src_addr),
2300                 ntohs(con->src_addr.ip.sin_port)));
2301
2302         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2303         talloc_free(con);
2304 }
2305
2306
2307 /* when traversing the list of all tcp connections to send tickle acks to
2308    (so that we can capture the ack coming back and kill the connection
2309     by a RST)
2310    this callback is called for each connection we are currently trying to kill
2311 */
2312 static void tickle_connection_traverse(void *param, void *data)
2313 {
2314         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2315
2316         /* have tried too many times, just give up */
2317         if (con->count >= 5) {
2318                 /* can't delete in traverse: reparent to delete_cons */
2319                 talloc_steal(param, con);
2320                 return;
2321         }
2322
2323         /* othervise, try tickling it again */
2324         con->count++;
2325         ctdb_sys_send_tcp(
2326                 (ctdb_sock_addr *)&con->dst_addr,
2327                 (ctdb_sock_addr *)&con->src_addr,
2328                 0, 0, 0);
2329 }
2330
2331
2332 /* 
2333    called every second until all sentenced connections have been reset
2334  */
2335 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2336                                               struct timeval t, void *private_data)
2337 {
2338         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2339         void *delete_cons = talloc_new(NULL);
2340
2341         /* loop over all connections sending tickle ACKs */
2342         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2343
2344         /* now we've finished traverse, it's safe to do deletion. */
2345         talloc_free(delete_cons);
2346
2347         /* If there are no more connections to kill we can remove the
2348            entire killtcp structure
2349          */
2350         if ( (killtcp->connections == NULL) || 
2351              (killtcp->connections->root == NULL) ) {
2352                 talloc_free(killtcp);
2353                 return;
2354         }
2355
2356         /* try tickling them again in a seconds time
2357          */
2358         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2359                         ctdb_tickle_sentenced_connections, killtcp);
2360 }
2361
2362 /*
2363   destroy the killtcp structure
2364  */
2365 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2366 {
2367         if (killtcp->vnn) {
2368                 killtcp->vnn->killtcp = NULL;
2369         }
2370         return 0;
2371 }
2372
2373
2374 /* nothing fancy here, just unconditionally replace any existing
2375    connection structure with the new one.
2376
2377    dont even free the old one if it did exist, that one is talloc_stolen
2378    by the same node in the tree anyway and will be deleted when the new data 
2379    is deleted
2380 */
2381 static void *add_killtcp_callback(void *parm, void *data)
2382 {
2383         return parm;
2384 }
2385
2386 /*
2387   add a tcp socket to the list of connections we want to RST
2388  */
2389 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2390                                        ctdb_sock_addr *s,
2391                                        ctdb_sock_addr *d)
2392 {
2393         ctdb_sock_addr src, dst;
2394         struct ctdb_kill_tcp *killtcp;
2395         struct ctdb_killtcp_con *con;
2396         struct ctdb_vnn *vnn;
2397
2398         ctdb_canonicalize_ip(s, &src);
2399         ctdb_canonicalize_ip(d, &dst);
2400
2401         vnn = find_public_ip_vnn(ctdb, &dst);
2402         if (vnn == NULL) {
2403                 vnn = find_public_ip_vnn(ctdb, &src);
2404         }
2405         if (vnn == NULL) {
2406                 /* if it is not a public ip   it could be our 'single ip' */
2407                 if (ctdb->single_ip_vnn) {
2408                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2409                                 vnn = ctdb->single_ip_vnn;
2410                         }
2411                 }
2412         }
2413         if (vnn == NULL) {
2414                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2415                 return -1;
2416         }
2417
2418         killtcp = vnn->killtcp;
2419         
2420         /* If this is the first connection to kill we must allocate
2421            a new structure
2422          */
2423         if (killtcp == NULL) {
2424                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2425                 CTDB_NO_MEMORY(ctdb, killtcp);
2426
2427                 killtcp->vnn         = vnn;
2428                 killtcp->ctdb        = ctdb;
2429                 killtcp->capture_fd  = -1;
2430                 killtcp->connections = trbt_create(killtcp, 0);
2431
2432                 vnn->killtcp         = killtcp;
2433                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2434         }
2435
2436
2437
2438         /* create a structure that describes this connection we want to
2439            RST and store it in killtcp->connections
2440         */
2441         con = talloc(killtcp, struct ctdb_killtcp_con);
2442         CTDB_NO_MEMORY(ctdb, con);
2443         con->src_addr = src;
2444         con->dst_addr = dst;
2445         con->count    = 0;
2446         con->killtcp  = killtcp;
2447
2448
2449         trbt_insertarray32_callback(killtcp->connections,
2450                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2451                         add_killtcp_callback, con);
2452
2453         /* 
2454            If we dont have a socket to listen on yet we must create it
2455          */
2456         if (killtcp->capture_fd == -1) {
2457                 const char *iface = ctdb_vnn_iface_string(vnn);
2458                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2459                 if (killtcp->capture_fd == -1) {
2460                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2461                                           "socket on iface '%s' for killtcp (%s)\n",
2462                                           iface, strerror(errno)));
2463                         goto failed;
2464                 }
2465         }
2466
2467
2468         if (killtcp->fde == NULL) {
2469                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2470                                             EVENT_FD_READ,
2471                                             capture_tcp_handler, killtcp);
2472                 tevent_fd_set_auto_close(killtcp->fde);
2473
2474                 /* We also need to set up some events to tickle all these connections
2475                    until they are all reset
2476                 */
2477                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2478                                 ctdb_tickle_sentenced_connections, killtcp);
2479         }
2480
2481         /* tickle him once now */
2482         ctdb_sys_send_tcp(
2483                 &con->dst_addr,
2484                 &con->src_addr,
2485                 0, 0, 0);
2486
2487         return 0;
2488
2489 failed:
2490         talloc_free(vnn->killtcp);
2491         vnn->killtcp = NULL;
2492         return -1;
2493 }
2494
2495 /*
2496   kill a TCP connection.
2497  */
2498 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2499 {
2500         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2501
2502         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2503 }
2504
2505 /*
2506   called by a daemon to inform us of the entire list of TCP tickles for
2507   a particular public address.
2508   this control should only be sent by the node that is currently serving
2509   that public address.
2510  */
2511 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2512 {
2513         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2514         struct ctdb_tcp_array *tcparray;
2515         struct ctdb_vnn *vnn;
2516
2517         /* We must at least have tickles.num or else we cant verify the size
2518            of the received data blob
2519          */
2520         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2521                                         tickles.connections)) {
2522                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2523                 return -1;
2524         }
2525
2526         /* verify that the size of data matches what we expect */
2527         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2528                                 tickles.connections)
2529                          + sizeof(struct ctdb_tcp_connection)
2530                                  * list->tickles.num) {
2531                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2532                 return -1;
2533         }       
2534
2535         vnn = find_public_ip_vnn(ctdb, &list->addr);
2536         if (vnn == NULL) {
2537                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2538                         ctdb_addr_to_str(&list->addr)));
2539
2540                 return 1;
2541         }
2542
2543         /* remove any old ticklelist we might have */
2544         talloc_free(vnn->tcp_array);
2545         vnn->tcp_array = NULL;
2546
2547         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2548         CTDB_NO_MEMORY(ctdb, tcparray);
2549
2550         tcparray->num = list->tickles.num;
2551
2552         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2553         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2554
2555         memcpy(tcparray->connections, &list->tickles.connections[0], 
2556                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2557
2558         /* We now have a new fresh tickle list array for this vnn */
2559         vnn->tcp_array = talloc_steal(vnn, tcparray);
2560         
2561         return 0;
2562 }
2563
2564 /*
2565   called to return the full list of tickles for the puclic address associated 
2566   with the provided vnn
2567  */
2568 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2569 {
2570         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2571         struct ctdb_control_tcp_tickle_list *list;
2572         struct ctdb_tcp_array *tcparray;
2573         int num;
2574         struct ctdb_vnn *vnn;
2575
2576         vnn = find_public_ip_vnn(ctdb, addr);
2577         if (vnn == NULL) {
2578                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2579                         ctdb_addr_to_str(addr)));
2580
2581                 return 1;
2582         }
2583
2584         tcparray = vnn->tcp_array;
2585         if (tcparray) {
2586                 num = tcparray->num;
2587         } else {
2588                 num = 0;
2589         }
2590
2591         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2592                                 tickles.connections)
2593                         + sizeof(struct ctdb_tcp_connection) * num;
2594
2595         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2596         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2597         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2598
2599         list->addr = *addr;
2600         list->tickles.num = num;
2601         if (num) {
2602                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2603                         sizeof(struct ctdb_tcp_connection) * num);
2604         }
2605
2606         return 0;
2607 }
2608
2609
2610 /*
2611   set the list of all tcp tickles for a public address
2612  */
2613 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2614                               struct timeval timeout, uint32_t destnode, 
2615                               ctdb_sock_addr *addr,
2616                               struct ctdb_tcp_array *tcparray)
2617 {
2618         int ret, num;
2619         TDB_DATA data;
2620         struct ctdb_control_tcp_tickle_list *list;
2621
2622         if (tcparray) {
2623                 num = tcparray->num;
2624         } else {
2625                 num = 0;
2626         }
2627
2628         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2629                                 tickles.connections) +
2630                         sizeof(struct ctdb_tcp_connection) * num;
2631         data.dptr = talloc_size(ctdb, data.dsize);
2632         CTDB_NO_MEMORY(ctdb, data.dptr);
2633
2634         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2635         list->addr = *addr;
2636         list->tickles.num = num;
2637         if (tcparray) {
2638                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2639         }
2640
2641         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2642                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2643                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2644         if (ret != 0) {
2645                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2646                 return -1;
2647         }
2648
2649         talloc_free(data.dptr);
2650
2651         return ret;
2652 }
2653
2654
2655 /*
2656   perform tickle updates if required
2657  */
2658 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2659                                 struct timed_event *te, 
2660                                 struct timeval t, void *private_data)
2661 {
2662         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2663         int ret;
2664         struct ctdb_vnn *vnn;
2665
2666         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2667                 /* we only send out updates for public addresses that 
2668                    we have taken over
2669                  */
2670                 if (ctdb->pnn != vnn->pnn) {
2671                         continue;
2672                 }
2673                 /* We only send out the updates if we need to */
2674                 if (!vnn->tcp_update_needed) {
2675                         continue;
2676                 }
2677                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2678                                 TAKEOVER_TIMEOUT(),
2679                                 CTDB_BROADCAST_CONNECTED,
2680                                 &vnn->public_address,
2681                                 vnn->tcp_array);
2682                 if (ret != 0) {
2683                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2684                                 ctdb_addr_to_str(&vnn->public_address)));
2685                 }
2686         }
2687
2688         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2689                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2690                              ctdb_update_tcp_tickles, ctdb);
2691 }               
2692         
2693
2694 /*
2695   start periodic update of tcp tickles
2696  */
2697 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2698 {
2699         ctdb->tickle_update_context = talloc_new(ctdb);
2700
2701         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2702                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2703                              ctdb_update_tcp_tickles, ctdb);
2704 }
2705
2706
2707
2708
2709 struct control_gratious_arp {
2710         struct ctdb_context *ctdb;
2711         ctdb_sock_addr addr;
2712         const char *iface;
2713         int count;
2714 };
2715
2716 /*
2717   send a control_gratuitous arp
2718  */
2719 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2720                                   struct timeval t, void *private_data)
2721 {
2722         int ret;
2723         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2724                                                         struct control_gratious_arp);
2725
2726         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2727         if (ret != 0) {
2728                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2729                                  arp->iface, strerror(errno)));
2730         }
2731
2732
2733         arp->count++;
2734         if (arp->count == CTDB_ARP_REPEAT) {
2735                 talloc_free(arp);
2736                 return;
2737         }
2738
2739         event_add_timed(arp->ctdb->ev, arp, 
2740                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2741                         send_gratious_arp, arp);
2742 }
2743
2744
2745 /*
2746   send a gratious arp 
2747  */
2748 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2749 {
2750         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2751         struct control_gratious_arp *arp;
2752
2753         /* verify the size of indata */
2754         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2755                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2756                                  (unsigned)indata.dsize, 
2757                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2758                 return -1;
2759         }
2760         if (indata.dsize != 
2761                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2762                 + gratious_arp->len ) ){
2763
2764                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2765                         "but should be %u bytes\n", 
2766                          (unsigned)indata.dsize, 
2767                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2768                 return -1;
2769         }
2770
2771
2772         arp = talloc(ctdb, struct control_gratious_arp);
2773         CTDB_NO_MEMORY(ctdb, arp);
2774
2775         arp->ctdb  = ctdb;
2776         arp->addr   = gratious_arp->addr;
2777         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2778         CTDB_NO_MEMORY(ctdb, arp->iface);
2779         arp->count = 0;
2780         
2781         event_add_timed(arp->ctdb->ev, arp, 
2782                         timeval_zero(), send_gratious_arp, arp);
2783
2784         return 0;
2785 }
2786
2787 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2788 {
2789         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2790         int ret;
2791
2792         /* verify the size of indata */
2793         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2794                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2795                 return -1;
2796         }
2797         if (indata.dsize != 
2798                 ( offsetof(struct ctdb_control_ip_iface, iface)
2799                 + pub->len ) ){
2800
2801                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2802                         "but should be %u bytes\n", 
2803                          (unsigned)indata.dsize, 
2804                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2805                 return -1;
2806         }
2807
2808         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2809
2810         if (ret != 0) {
2811                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2812                 return -1;
2813         }
2814
2815         return 0;
2816 }
2817
2818 /*
2819   called when releaseip event finishes for del_public_address
2820  */
2821 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2822                                 void *private_data)
2823 {
2824         talloc_free(private_data);
2825 }
2826
2827 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2828 {
2829         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2830         struct ctdb_vnn *vnn;
2831         int ret;
2832
2833         /* verify the size of indata */
2834         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2835                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2836                 return -1;
2837         }
2838         if (indata.dsize != 
2839                 ( offsetof(struct ctdb_control_ip_iface, iface)
2840                 + pub->len ) ){
2841
2842                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2843                         "but should be %u bytes\n", 
2844                          (unsigned)indata.dsize, 
2845                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2846                 return -1;
2847         }
2848
2849         /* walk over all public addresses until we find a match */
2850         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2851                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2852                         TALLOC_CTX *mem_ctx;
2853
2854                         DLIST_REMOVE(ctdb->vnn, vnn);
2855                         if (vnn->iface == NULL) {
2856                                 talloc_free(vnn);
2857                                 return 0;
2858                         }
2859
2860                         mem_ctx = talloc_new(ctdb);
2861                         ret = ctdb_event_script_callback(ctdb, 
2862                                          mem_ctx, delete_ip_callback, mem_ctx,
2863                                          false,
2864                                          CTDB_EVENT_RELEASE_IP,
2865                                          "%s %s %u",
2866                                          ctdb_vnn_iface_string(vnn),
2867                                          ctdb_addr_to_str(&vnn->public_address),
2868                                          vnn->public_netmask_bits);
2869                         ctdb_vnn_unassign_iface(ctdb, vnn);
2870                         talloc_free(vnn);
2871                         if (ret != 0) {
2872                                 return -1;
2873                         }
2874                         return 0;
2875                 }
2876         }
2877
2878         return -1;
2879 }
2880
2881 /* This function is called from the recovery daemon to verify that a remote
2882    node has the expected ip allocation.
2883    This is verified against ctdb->ip_tree
2884 */
2885 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2886 {
2887         struct ctdb_public_ip_list *tmp_ip; 
2888         int i;
2889
2890         if (ctdb->ip_tree == NULL) {
2891                 /* dont know the expected allocation yet, assume remote node
2892                    is correct. */
2893                 return 0;
2894         }
2895
2896         if (ips == NULL) {
2897                 return 0;
2898         }
2899
2900         for (i=0; i<ips->num; i++) {
2901                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2902                 if (tmp_ip == NULL) {
2903                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2904                         return -1;
2905                 }
2906
2907                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2908                         continue;
2909                 }
2910
2911                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2912                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2913                         return -1;
2914                 }
2915         }
2916
2917         return 0;
2918 }
2919
2920 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2921 {
2922         struct ctdb_public_ip_list *tmp_ip; 
2923
2924         if (ctdb->ip_tree == NULL) {
2925                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2926                 return -1;
2927         }
2928
2929         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2930         if (tmp_ip == NULL) {
2931                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2932                 return -1;
2933         }
2934
2935         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2936         tmp_ip->pnn = ip->pnn;
2937
2938         return 0;
2939 }