idtree: fix right shift of signed ints, crash on large ids on AIX
[metze/ctdb/wip.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334         TDB_DATA data;
335
336         if (status != 0) {
337                 if (status == -ETIME) {
338                         ctdb_ban_self(ctdb);
339                 }
340                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341                                  ctdb_addr_to_str(&state->vnn->public_address),
342                                  ctdb_vnn_iface_string(state->vnn)));
343                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
344                 talloc_free(state);
345                 return;
346         }
347
348         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
349         if (ret != 0) {
350                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
351                 talloc_free(state);
352                 return;
353         }
354
355         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
356         data.dsize = strlen((char *)data.dptr) + 1;
357         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
358
359         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
360
361
362         /* the control succeeded */
363         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
364         talloc_free(state);
365         return;
366 }
367
368 /*
369   take over an ip address
370  */
371 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
372                               struct ctdb_req_control *c,
373                               struct ctdb_vnn *vnn)
374 {
375         int ret;
376         struct ctdb_do_takeip_state *state;
377
378         ret = ctdb_vnn_assign_iface(ctdb, vnn);
379         if (ret != 0) {
380                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
381                                  "assin a usable interface\n",
382                                  ctdb_addr_to_str(&vnn->public_address),
383                                  vnn->public_netmask_bits));
384                 return -1;
385         }
386
387         state = talloc(vnn, struct ctdb_do_takeip_state);
388         CTDB_NO_MEMORY(ctdb, state);
389
390         state->c = talloc_steal(ctdb, c);
391         state->vnn   = vnn;
392
393         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
394                             ctdb_addr_to_str(&vnn->public_address),
395                             vnn->public_netmask_bits,
396                             ctdb_vnn_iface_string(vnn)));
397
398         ret = ctdb_event_script_callback(ctdb,
399                                          state,
400                                          ctdb_do_takeip_callback,
401                                          state,
402                                          false,
403                                          CTDB_EVENT_TAKE_IP,
404                                          "%s %s %u",
405                                          ctdb_vnn_iface_string(vnn),
406                                          ctdb_addr_to_str(&vnn->public_address),
407                                          vnn->public_netmask_bits);
408
409         if (ret != 0) {
410                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
411                         ctdb_addr_to_str(&vnn->public_address),
412                         ctdb_vnn_iface_string(vnn)));
413                 talloc_free(state);
414                 return -1;
415         }
416
417         return 0;
418 }
419
420 struct ctdb_do_updateip_state {
421         struct ctdb_req_control *c;
422         struct ctdb_iface *old;
423         struct ctdb_vnn *vnn;
424 };
425
426 /*
427   called when updateip event finishes
428  */
429 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
430                                       void *private_data)
431 {
432         struct ctdb_do_updateip_state *state =
433                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
434         int32_t ret;
435
436         if (status != 0) {
437                 if (status == -ETIME) {
438                         ctdb_ban_self(ctdb);
439                 }
440                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
441                         ctdb_addr_to_str(&state->vnn->public_address),
442                         state->old->name,
443                         ctdb_vnn_iface_string(state->vnn)));
444
445                 /*
446                  * All we can do is reset the old interface
447                  * and let the next run fix it
448                  */
449                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
450                 state->vnn->iface = state->old;
451                 state->vnn->iface->references++;
452
453                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
454                 talloc_free(state);
455                 return;
456         }
457
458         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
459         if (ret != 0) {
460                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
461                 talloc_free(state);
462                 return;
463         }
464
465         /* the control succeeded */
466         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
467         talloc_free(state);
468         return;
469 }
470
471 /*
472   update (move) an ip address
473  */
474 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
475                                 struct ctdb_req_control *c,
476                                 struct ctdb_vnn *vnn)
477 {
478         int ret;
479         struct ctdb_do_updateip_state *state;
480         struct ctdb_iface *old = vnn->iface;
481
482         ctdb_vnn_unassign_iface(ctdb, vnn);
483         ret = ctdb_vnn_assign_iface(ctdb, vnn);
484         if (ret != 0) {
485                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
486                                  "assin a usable interface (old iface '%s')\n",
487                                  ctdb_addr_to_str(&vnn->public_address),
488                                  vnn->public_netmask_bits,
489                                  old->name));
490                 return -1;
491         }
492
493         if (vnn->iface == old) {
494                 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
495                                  "assin a same interface '%s'\n",
496                                  ctdb_addr_to_str(&vnn->public_address),
497                                  vnn->public_netmask_bits,
498                                  old->name));
499                 return -1;
500         }
501
502         state = talloc(vnn, struct ctdb_do_updateip_state);
503         CTDB_NO_MEMORY(ctdb, state);
504
505         state->c = talloc_steal(ctdb, c);
506         state->old = old;
507         state->vnn = vnn;
508
509         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
510                             "interface %s to %s\n",
511                             ctdb_addr_to_str(&vnn->public_address),
512                             vnn->public_netmask_bits,
513                             old->name,
514                             ctdb_vnn_iface_string(vnn)));
515
516         ret = ctdb_event_script_callback(ctdb,
517                                          state,
518                                          ctdb_do_updateip_callback,
519                                          state,
520                                          false,
521                                          CTDB_EVENT_UPDATE_IP,
522                                          "%s %s %s %u",
523                                          state->old->name,
524                                          ctdb_vnn_iface_string(vnn),
525                                          ctdb_addr_to_str(&vnn->public_address),
526                                          vnn->public_netmask_bits);
527         if (ret != 0) {
528                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
529                                  ctdb_addr_to_str(&vnn->public_address),
530                                  old->name, ctdb_vnn_iface_string(vnn)));
531                 talloc_free(state);
532                 return -1;
533         }
534
535         return 0;
536 }
537
538 /*
539   Find the vnn of the node that has a public ip address
540   returns -1 if the address is not known as a public address
541  */
542 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
543 {
544         struct ctdb_vnn *vnn;
545
546         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
547                 if (ctdb_same_ip(&vnn->public_address, addr)) {
548                         return vnn;
549                 }
550         }
551
552         return NULL;
553 }
554
555 /*
556   take over an ip address
557  */
558 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
559                                  struct ctdb_req_control *c,
560                                  TDB_DATA indata,
561                                  bool *async_reply)
562 {
563         int ret;
564         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
565         struct ctdb_vnn *vnn;
566         bool have_ip = false;
567         bool do_updateip = false;
568         bool do_takeip = false;
569         struct ctdb_iface *best_iface = NULL;
570
571         if (pip->pnn != ctdb->pnn) {
572                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
573                                  "with pnn %d, but we're node %d\n",
574                                  ctdb_addr_to_str(&pip->addr),
575                                  pip->pnn, ctdb->pnn));
576                 return -1;
577         }
578
579         /* update out vnn list */
580         vnn = find_public_ip_vnn(ctdb, &pip->addr);
581         if (vnn == NULL) {
582                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
583                         ctdb_addr_to_str(&pip->addr)));
584                 return 0;
585         }
586
587         have_ip = ctdb_sys_have_ip(&pip->addr);
588         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
589         if (best_iface == NULL) {
590                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
591                                  "a usable interface (old %s, have_ip %d)\n",
592                                  ctdb_addr_to_str(&vnn->public_address),
593                                  vnn->public_netmask_bits,
594                                  ctdb_vnn_iface_string(vnn),
595                                  have_ip));
596                 return -1;
597         }
598
599         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
600                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
601                 have_ip = false;
602         }
603
604         if (vnn->iface == NULL && have_ip) {
605                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
606                                   "but we have no interface assigned, has someone manually configured it?"
607                                   "banning ourself\n",
608                                  ctdb_addr_to_str(&vnn->public_address)));
609                 ctdb_ban_self(ctdb);
610                 return -1;
611         }
612
613         if (vnn->pnn != ctdb->pnn && have_ip) {
614                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
615                                   "and we have it on iface[%s], but it was assigned to node %d"
616                                   "and we are node %d, banning ourself\n",
617                                  ctdb_addr_to_str(&vnn->public_address),
618                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
619                 ctdb_ban_self(ctdb);
620                 return -1;
621         }
622
623         if (vnn->iface) {
624                 if (vnn->iface->link_up) {
625                         /* only move when the rebalance gains something */
626                         if (vnn->iface->references > (best_iface->references + 1)) {
627                                 do_updateip = true;
628                         }
629                 } else if (vnn->iface != best_iface) {
630                         do_updateip = true;
631                 }
632         }
633
634         if (!have_ip) {
635                 if (do_updateip) {
636                         ctdb_vnn_unassign_iface(ctdb, vnn);
637                         do_updateip = false;
638                 }
639                 do_takeip = true;
640         }
641
642         if (do_takeip) {
643                 ret = ctdb_do_takeip(ctdb, c, vnn);
644                 if (ret != 0) {
645                         return -1;
646                 }
647         } else if (do_updateip) {
648                 ret = ctdb_do_updateip(ctdb, c, vnn);
649                 if (ret != 0) {
650                         return -1;
651                 }
652         } else {
653                 /*
654                  * The interface is up and the kernel known the ip
655                  * => do nothing
656                  */
657                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
658                         ctdb_addr_to_str(&pip->addr),
659                         vnn->public_netmask_bits,
660                         ctdb_vnn_iface_string(vnn)));
661                 return 0;
662         }
663
664         /* tell ctdb_control.c that we will be replying asynchronously */
665         *async_reply = true;
666
667         return 0;
668 }
669
670 /*
671   takeover an ip address old v4 style
672  */
673 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
674                                 struct ctdb_req_control *c,
675                                 TDB_DATA indata, 
676                                 bool *async_reply)
677 {
678         TDB_DATA data;
679         
680         data.dsize = sizeof(struct ctdb_public_ip);
681         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
682         CTDB_NO_MEMORY(ctdb, data.dptr);
683         
684         memcpy(data.dptr, indata.dptr, indata.dsize);
685         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
686 }
687
688 /*
689   kill any clients that are registered with a IP that is being released
690  */
691 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
692 {
693         struct ctdb_client_ip *ip;
694
695         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
696                 ctdb_addr_to_str(addr)));
697
698         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
699                 ctdb_sock_addr tmp_addr;
700
701                 tmp_addr = ip->addr;
702                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
703                         ip->client_id,
704                         ctdb_addr_to_str(&ip->addr)));
705
706                 if (ctdb_same_ip(&tmp_addr, addr)) {
707                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
708                                                                      ip->client_id, 
709                                                                      struct ctdb_client);
710                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
711                                 ip->client_id,
712                                 ctdb_addr_to_str(&ip->addr),
713                                 client->pid));
714
715                         if (client->pid != 0) {
716                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
717                                         (unsigned)client->pid,
718                                         ctdb_addr_to_str(addr),
719                                         ip->client_id));
720                                 kill(client->pid, SIGKILL);
721                         }
722                 }
723         }
724 }
725
726 /*
727   called when releaseip event finishes
728  */
729 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
730                                 void *private_data)
731 {
732         struct takeover_callback_state *state = 
733                 talloc_get_type(private_data, struct takeover_callback_state);
734         TDB_DATA data;
735
736         if (status == -ETIME) {
737                 ctdb_ban_self(ctdb);
738         }
739
740         /* send a message to all clients of this node telling them
741            that the cluster has been reconfigured and they should
742            release any sockets on this IP */
743         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
744         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
745         data.dsize = strlen((char *)data.dptr)+1;
746
747         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
748
749         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
750
751         /* kill clients that have registered with this IP */
752         release_kill_clients(ctdb, state->addr);
753
754         ctdb_vnn_unassign_iface(ctdb, state->vnn);
755
756         /* the control succeeded */
757         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
758         talloc_free(state);
759 }
760
761 /*
762   release an ip address
763  */
764 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
765                                 struct ctdb_req_control *c,
766                                 TDB_DATA indata, 
767                                 bool *async_reply)
768 {
769         int ret;
770         struct takeover_callback_state *state;
771         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
772         struct ctdb_vnn *vnn;
773
774         /* update our vnn list */
775         vnn = find_public_ip_vnn(ctdb, &pip->addr);
776         if (vnn == NULL) {
777                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
778                         ctdb_addr_to_str(&pip->addr)));
779                 return 0;
780         }
781         vnn->pnn = pip->pnn;
782
783         /* stop any previous arps */
784         talloc_free(vnn->takeover_ctx);
785         vnn->takeover_ctx = NULL;
786
787         if (!ctdb_sys_have_ip(&pip->addr)) {
788                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
789                         ctdb_addr_to_str(&pip->addr),
790                         vnn->public_netmask_bits, 
791                         ctdb_vnn_iface_string(vnn)));
792                 ctdb_vnn_unassign_iface(ctdb, vnn);
793                 return 0;
794         }
795
796         if (vnn->iface == NULL) {
797                 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
798                                   "but we have no interface assigned, has someone manually configured it?"
799                                   "banning ourself\n",
800                                  ctdb_addr_to_str(&vnn->public_address)));
801                 ctdb_ban_self(ctdb);
802                 return -1;
803         }
804
805         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
806                 ctdb_addr_to_str(&pip->addr),
807                 vnn->public_netmask_bits, 
808                 ctdb_vnn_iface_string(vnn),
809                 pip->pnn));
810
811         state = talloc(ctdb, struct takeover_callback_state);
812         CTDB_NO_MEMORY(ctdb, state);
813
814         state->c = talloc_steal(state, c);
815         state->addr = talloc(state, ctdb_sock_addr);       
816         CTDB_NO_MEMORY(ctdb, state->addr);
817         *state->addr = pip->addr;
818         state->vnn   = vnn;
819
820         ret = ctdb_event_script_callback(ctdb, 
821                                          state, release_ip_callback, state,
822                                          false,
823                                          CTDB_EVENT_RELEASE_IP,
824                                          "%s %s %u",
825                                          ctdb_vnn_iface_string(vnn),
826                                          ctdb_addr_to_str(&pip->addr),
827                                          vnn->public_netmask_bits);
828         if (ret != 0) {
829                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
830                         ctdb_addr_to_str(&pip->addr),
831                         ctdb_vnn_iface_string(vnn)));
832                 talloc_free(state);
833                 return -1;
834         }
835
836         /* tell the control that we will be reply asynchronously */
837         *async_reply = true;
838         return 0;
839 }
840
841 /*
842   release an ip address old v4 style
843  */
844 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
845                                 struct ctdb_req_control *c,
846                                 TDB_DATA indata, 
847                                 bool *async_reply)
848 {
849         TDB_DATA data;
850         
851         data.dsize = sizeof(struct ctdb_public_ip);
852         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
853         CTDB_NO_MEMORY(ctdb, data.dptr);
854         
855         memcpy(data.dptr, indata.dptr, indata.dsize);
856         return ctdb_control_release_ip(ctdb, c, data, async_reply);
857 }
858
859
860 static int ctdb_add_public_address(struct ctdb_context *ctdb,
861                                    ctdb_sock_addr *addr,
862                                    unsigned mask, const char *ifaces)
863 {
864         struct ctdb_vnn      *vnn;
865         uint32_t num = 0;
866         char *tmp;
867         const char *iface;
868         int i;
869         int ret;
870
871         /* Verify that we dont have an entry for this ip yet */
872         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
873                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
874                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
875                                 ctdb_addr_to_str(addr)));
876                         return -1;
877                 }               
878         }
879
880         /* create a new vnn structure for this ip address */
881         vnn = talloc_zero(ctdb, struct ctdb_vnn);
882         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
883         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
884         tmp = talloc_strdup(vnn, ifaces);
885         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
886         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
887                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
888                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
889                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
890                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
891                 num++;
892         }
893         talloc_free(tmp);
894         vnn->ifaces[num] = NULL;
895         vnn->public_address      = *addr;
896         vnn->public_netmask_bits = mask;
897         vnn->pnn                 = -1;
898
899         for (i=0; vnn->ifaces[i]; i++) {
900                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
901                 if (ret != 0) {
902                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
903                                            "for public_address[%s]\n",
904                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
905                         talloc_free(vnn);
906                         return -1;
907                 }
908         }
909
910         DLIST_ADD(ctdb->vnn, vnn);
911
912         return 0;
913 }
914
915 /*
916   setup the event script directory
917 */
918 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
919 {
920         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
921         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
922         return 0;
923 }
924
925 /*
926   setup the public address lists from a file
927 */
928 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
929 {
930         char **lines;
931         int nlines;
932         int i;
933
934         lines = file_lines_load(alist, &nlines, ctdb);
935         if (lines == NULL) {
936                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
937                 return -1;
938         }
939         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
940                 nlines--;
941         }
942
943         for (i=0;i<nlines;i++) {
944                 unsigned mask;
945                 ctdb_sock_addr addr;
946                 const char *addrstr;
947                 const char *ifaces;
948                 char *tok, *line;
949
950                 line = lines[i];
951                 while ((*line == ' ') || (*line == '\t')) {
952                         line++;
953                 }
954                 if (*line == '#') {
955                         continue;
956                 }
957                 if (strcmp(line, "") == 0) {
958                         continue;
959                 }
960                 tok = strtok(line, " \t");
961                 addrstr = tok;
962                 tok = strtok(NULL, " \t");
963                 if (tok == NULL) {
964                         if (NULL == ctdb->default_public_interface) {
965                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
966                                          i+1));
967                                 talloc_free(lines);
968                                 return -1;
969                         }
970                         ifaces = ctdb->default_public_interface;
971                 } else {
972                         ifaces = tok;
973                 }
974
975                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
976                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
977                         talloc_free(lines);
978                         return -1;
979                 }
980                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
981                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
982                         talloc_free(lines);
983                         return -1;
984                 }
985         }
986
987         talloc_free(lines);
988         return 0;
989 }
990
991 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
992                               const char *iface,
993                               const char *ip)
994 {
995         struct ctdb_vnn *svnn;
996         bool ok;
997         int ret;
998
999         svnn = talloc_zero(ctdb, struct ctdb_vnn);
1000         CTDB_NO_MEMORY(ctdb, svnn);
1001
1002         svnn->ifaces = talloc_array(svnn, const char *, 2);
1003         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1004         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1005         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1006         svnn->ifaces[1] = NULL;
1007
1008         ok = parse_ip(ip, iface, 0, &svnn->public_address);
1009         if (!ok) {
1010                 talloc_free(svnn);
1011                 return -1;
1012         }
1013
1014         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1015         if (ret != 0) {
1016                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1017                                    "for single_ip[%s]\n",
1018                                    svnn->ifaces[0],
1019                                    ctdb_addr_to_str(&svnn->public_address)));
1020                 talloc_free(svnn);
1021                 return -1;
1022         }
1023
1024         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1025         if (ret != 0) {
1026                 talloc_free(svnn);
1027                 return -1;
1028         }
1029
1030         ctdb->single_ip_vnn = svnn;
1031         return 0;
1032 }
1033
1034 struct ctdb_public_ip_list {
1035         struct ctdb_public_ip_list *next;
1036         uint32_t pnn;
1037         ctdb_sock_addr addr;
1038 };
1039
1040
1041 /* Given a physical node, return the number of
1042    public addresses that is currently assigned to this node.
1043 */
1044 static int node_ip_coverage(struct ctdb_context *ctdb, 
1045         int32_t pnn,
1046         struct ctdb_public_ip_list *ips)
1047 {
1048         int num=0;
1049
1050         for (;ips;ips=ips->next) {
1051                 if (ips->pnn == pnn) {
1052                         num++;
1053                 }
1054         }
1055         return num;
1056 }
1057
1058
1059 /* Check if this is a public ip known to the node, i.e. can that
1060    node takeover this ip ?
1061 */
1062 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1063                 struct ctdb_public_ip_list *ip)
1064 {
1065         struct ctdb_all_public_ips *public_ips;
1066         int i;
1067
1068         public_ips = ctdb->nodes[pnn]->available_public_ips;
1069
1070         if (public_ips == NULL) {
1071                 return -1;
1072         }
1073
1074         for (i=0;i<public_ips->num;i++) {
1075                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1076                         /* yes, this node can serve this public ip */
1077                         return 0;
1078                 }
1079         }
1080
1081         return -1;
1082 }
1083
1084
1085 /* search the node lists list for a node to takeover this ip.
1086    pick the node that currently are serving the least number of ips
1087    so that the ips get spread out evenly.
1088 */
1089 static int find_takeover_node(struct ctdb_context *ctdb, 
1090                 struct ctdb_node_map *nodemap, uint32_t mask, 
1091                 struct ctdb_public_ip_list *ip,
1092                 struct ctdb_public_ip_list *all_ips)
1093 {
1094         int pnn, min=0, num;
1095         int i;
1096
1097         pnn    = -1;
1098         for (i=0;i<nodemap->num;i++) {
1099                 if (nodemap->nodes[i].flags & mask) {
1100                         /* This node is not healty and can not be used to serve
1101                            a public address 
1102                         */
1103                         continue;
1104                 }
1105
1106                 /* verify that this node can serve this ip */
1107                 if (can_node_serve_ip(ctdb, i, ip)) {
1108                         /* no it couldnt   so skip to the next node */
1109                         continue;
1110                 }
1111
1112                 num = node_ip_coverage(ctdb, i, all_ips);
1113                 /* was this the first node we checked ? */
1114                 if (pnn == -1) {
1115                         pnn = i;
1116                         min  = num;
1117                 } else {
1118                         if (num < min) {
1119                                 pnn = i;
1120                                 min  = num;
1121                         }
1122                 }
1123         }       
1124         if (pnn == -1) {
1125                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1126                         ctdb_addr_to_str(&ip->addr)));
1127
1128                 return -1;
1129         }
1130
1131         ip->pnn = pnn;
1132         return 0;
1133 }
1134
1135 #define IP_KEYLEN       4
1136 static uint32_t *ip_key(ctdb_sock_addr *ip)
1137 {
1138         static uint32_t key[IP_KEYLEN];
1139
1140         bzero(key, sizeof(key));
1141
1142         switch (ip->sa.sa_family) {
1143         case AF_INET:
1144                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1145                 break;
1146         case AF_INET6:
1147                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1148                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1149                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1150                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1151                 break;
1152         default:
1153                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1154                 return key;
1155         }
1156
1157         return key;
1158 }
1159
1160 static void *add_ip_callback(void *parm, void *data)
1161 {
1162         return parm;
1163 }
1164
1165 void getips_count_callback(void *param, void *data)
1166 {
1167         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1168         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1169
1170         new_ip->next = *ip_list;
1171         *ip_list     = new_ip;
1172 }
1173
1174 static struct ctdb_public_ip_list *
1175 create_merged_ip_list(struct ctdb_context *ctdb)
1176 {
1177         int i, j;
1178         struct ctdb_public_ip_list *ip_list;
1179         struct ctdb_all_public_ips *public_ips;
1180
1181         if (ctdb->ip_tree != NULL) {
1182                 talloc_free(ctdb->ip_tree);
1183                 ctdb->ip_tree = NULL;
1184         }
1185         ctdb->ip_tree = trbt_create(ctdb, 0);
1186
1187         for (i=0;i<ctdb->num_nodes;i++) {
1188                 public_ips = ctdb->nodes[i]->known_public_ips;
1189
1190                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1191                         continue;
1192                 }
1193
1194                 /* there were no public ips for this node */
1195                 if (public_ips == NULL) {
1196                         continue;
1197                 }               
1198
1199                 for (j=0;j<public_ips->num;j++) {
1200                         struct ctdb_public_ip_list *tmp_ip; 
1201
1202                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1203                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1204                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1205                         tmp_ip->addr = public_ips->ips[j].addr;
1206                         tmp_ip->next = NULL;
1207
1208                         trbt_insertarray32_callback(ctdb->ip_tree,
1209                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1210                                 add_ip_callback,
1211                                 tmp_ip);
1212                 }
1213         }
1214
1215         ip_list = NULL;
1216         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1217
1218         return ip_list;
1219 }
1220
1221 /*
1222   make any IP alias changes for public addresses that are necessary 
1223  */
1224 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1225 {
1226         int i, num_healthy, retries;
1227         struct ctdb_public_ip ip;
1228         struct ctdb_public_ipv4 ipv4;
1229         uint32_t mask, *nodes;
1230         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1231         int maxnode, maxnum=0, minnode, minnum=0, num;
1232         TDB_DATA data;
1233         struct timeval timeout;
1234         struct client_async_data *async_data;
1235         struct ctdb_client_control_state *state;
1236         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1237
1238
1239         ZERO_STRUCT(ip);
1240
1241         /* Count how many completely healthy nodes we have */
1242         num_healthy = 0;
1243         for (i=0;i<nodemap->num;i++) {
1244                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1245                         num_healthy++;
1246                 }
1247         }
1248
1249         if (num_healthy > 0) {
1250                 /* We have healthy nodes, so only consider them for 
1251                    serving public addresses
1252                 */
1253                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1254         } else {
1255                 /* We didnt have any completely healthy nodes so
1256                    use "disabled" nodes as a fallback
1257                 */
1258                 mask = NODE_FLAGS_INACTIVE;
1259         }
1260
1261         /* since nodes only know about those public addresses that
1262            can be served by that particular node, no single node has
1263            a full list of all public addresses that exist in the cluster.
1264            Walk over all node structures and create a merged list of
1265            all public addresses that exist in the cluster.
1266
1267            keep the tree of ips around as ctdb->ip_tree
1268         */
1269         all_ips = create_merged_ip_list(ctdb);
1270
1271         /* If we want deterministic ip allocations, i.e. that the ip addresses
1272            will always be allocated the same way for a specific set of
1273            available/unavailable nodes.
1274         */
1275         if (1 == ctdb->tunable.deterministic_public_ips) {              
1276                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1277                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1278                         tmp_ip->pnn = i%nodemap->num;
1279                 }
1280         }
1281
1282
1283         /* mark all public addresses with a masked node as being served by
1284            node -1
1285         */
1286         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1287                 if (tmp_ip->pnn == -1) {
1288                         continue;
1289                 }
1290                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1291                         tmp_ip->pnn = -1;
1292                 }
1293         }
1294
1295         /* verify that the assigned nodes can serve that public ip
1296            and set it to -1 if not
1297         */
1298         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1299                 if (tmp_ip->pnn == -1) {
1300                         continue;
1301                 }
1302                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1303                         /* this node can not serve this ip. */
1304                         tmp_ip->pnn = -1;
1305                 }
1306         }
1307
1308
1309         /* now we must redistribute all public addresses with takeover node
1310            -1 among the nodes available
1311         */
1312         retries = 0;
1313 try_again:
1314         /* loop over all ip's and find a physical node to cover for 
1315            each unassigned ip.
1316         */
1317         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1318                 if (tmp_ip->pnn == -1) {
1319                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1320                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1321                                         ctdb_addr_to_str(&tmp_ip->addr)));
1322                         }
1323                 }
1324         }
1325
1326         /* If we dont want ips to fail back after a node becomes healthy
1327            again, we wont even try to reallocat the ip addresses so that
1328            they are evenly spread out.
1329            This can NOT be used at the same time as DeterministicIPs !
1330         */
1331         if (1 == ctdb->tunable.no_ip_failback) {
1332                 if (1 == ctdb->tunable.deterministic_public_ips) {
1333                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1334                 }
1335                 goto finished;
1336         }
1337
1338
1339         /* now, try to make sure the ip adresses are evenly distributed
1340            across the node.
1341            for each ip address, loop over all nodes that can serve this
1342            ip and make sure that the difference between the node
1343            serving the most and the node serving the least ip's are not greater
1344            than 1.
1345         */
1346         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1347                 if (tmp_ip->pnn == -1) {
1348                         continue;
1349                 }
1350
1351                 /* Get the highest and lowest number of ips's served by any 
1352                    valid node which can serve this ip.
1353                 */
1354                 maxnode = -1;
1355                 minnode = -1;
1356                 for (i=0;i<nodemap->num;i++) {
1357                         if (nodemap->nodes[i].flags & mask) {
1358                                 continue;
1359                         }
1360
1361                         /* only check nodes that can actually serve this ip */
1362                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1363                                 /* no it couldnt   so skip to the next node */
1364                                 continue;
1365                         }
1366
1367                         num = node_ip_coverage(ctdb, i, all_ips);
1368                         if (maxnode == -1) {
1369                                 maxnode = i;
1370                                 maxnum  = num;
1371                         } else {
1372                                 if (num > maxnum) {
1373                                         maxnode = i;
1374                                         maxnum  = num;
1375                                 }
1376                         }
1377                         if (minnode == -1) {
1378                                 minnode = i;
1379                                 minnum  = num;
1380                         } else {
1381                                 if (num < minnum) {
1382                                         minnode = i;
1383                                         minnum  = num;
1384                                 }
1385                         }
1386                 }
1387                 if (maxnode == -1) {
1388                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1389                                 ctdb_addr_to_str(&tmp_ip->addr)));
1390
1391                         continue;
1392                 }
1393
1394                 /* If we want deterministic IPs then dont try to reallocate 
1395                    them to spread out the load.
1396                 */
1397                 if (1 == ctdb->tunable.deterministic_public_ips) {
1398                         continue;
1399                 }
1400
1401                 /* if the spread between the smallest and largest coverage by
1402                    a node is >=2 we steal one of the ips from the node with
1403                    most coverage to even things out a bit.
1404                    try to do this at most 5 times  since we dont want to spend
1405                    too much time balancing the ip coverage.
1406                 */
1407                 if ( (maxnum > minnum+1)
1408                   && (retries < 5) ){
1409                         struct ctdb_public_ip_list *tmp;
1410
1411                         /* mark one of maxnode's vnn's as unassigned and try
1412                            again
1413                         */
1414                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1415                                 if (tmp->pnn == maxnode) {
1416                                         tmp->pnn = -1;
1417                                         retries++;
1418                                         goto try_again;
1419                                 }
1420                         }
1421                 }
1422         }
1423
1424
1425         /* finished distributing the public addresses, now just send the 
1426            info out to the nodes
1427         */
1428 finished:
1429
1430         /* at this point ->pnn is the node which will own each IP
1431            or -1 if there is no node that can cover this ip
1432         */
1433
1434         /* now tell all nodes to delete any alias that they should not
1435            have.  This will be a NOOP on nodes that don't currently
1436            hold the given alias */
1437         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1438         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1439
1440         for (i=0;i<nodemap->num;i++) {
1441                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1442                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1443                         continue;
1444                 }
1445
1446                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1447                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1448                                 /* This node should be serving this
1449                                    vnn so dont tell it to release the ip
1450                                 */
1451                                 continue;
1452                         }
1453                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1454                                 ipv4.pnn = tmp_ip->pnn;
1455                                 ipv4.sin = tmp_ip->addr.ip;
1456
1457                                 timeout = TAKEOVER_TIMEOUT();
1458                                 data.dsize = sizeof(ipv4);
1459                                 data.dptr  = (uint8_t *)&ipv4;
1460                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1461                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1462                                                 data, async_data,
1463                                                 &timeout, NULL);
1464                         } else {
1465                                 ip.pnn  = tmp_ip->pnn;
1466                                 ip.addr = tmp_ip->addr;
1467
1468                                 timeout = TAKEOVER_TIMEOUT();
1469                                 data.dsize = sizeof(ip);
1470                                 data.dptr  = (uint8_t *)&ip;
1471                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1472                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1473                                                 data, async_data,
1474                                                 &timeout, NULL);
1475                         }
1476
1477                         if (state == NULL) {
1478                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1479                                 talloc_free(tmp_ctx);
1480                                 return -1;
1481                         }
1482                 
1483                         ctdb_client_async_add(async_data, state);
1484                 }
1485         }
1486         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1487                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1488                 talloc_free(tmp_ctx);
1489                 return -1;
1490         }
1491         talloc_free(async_data);
1492
1493
1494         /* tell all nodes to get their own IPs */
1495         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1496         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1497         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1498                 if (tmp_ip->pnn == -1) {
1499                         /* this IP won't be taken over */
1500                         continue;
1501                 }
1502
1503                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1504                         ipv4.pnn = tmp_ip->pnn;
1505                         ipv4.sin = tmp_ip->addr.ip;
1506
1507                         timeout = TAKEOVER_TIMEOUT();
1508                         data.dsize = sizeof(ipv4);
1509                         data.dptr  = (uint8_t *)&ipv4;
1510                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1511                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1512                                         data, async_data,
1513                                         &timeout, NULL);
1514                 } else {
1515                         ip.pnn  = tmp_ip->pnn;
1516                         ip.addr = tmp_ip->addr;
1517
1518                         timeout = TAKEOVER_TIMEOUT();
1519                         data.dsize = sizeof(ip);
1520                         data.dptr  = (uint8_t *)&ip;
1521                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1522                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1523                                         data, async_data,
1524                                         &timeout, NULL);
1525                 }
1526                 if (state == NULL) {
1527                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1528                         talloc_free(tmp_ctx);
1529                         return -1;
1530                 }
1531                 
1532                 ctdb_client_async_add(async_data, state);
1533         }
1534         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1535                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1536                 talloc_free(tmp_ctx);
1537                 return -1;
1538         }
1539
1540         /* tell all nodes to update natwg */
1541         /* send the flags update natgw on all connected nodes */
1542         data.dptr  = discard_const("ipreallocated");
1543         data.dsize = strlen((char *)data.dptr) + 1; 
1544         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1545         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1546                                       nodes, 0, TAKEOVER_TIMEOUT(),
1547                                       false, data,
1548                                       NULL, NULL,
1549                                       NULL) != 0) {
1550                 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1551         }
1552
1553         talloc_free(tmp_ctx);
1554         return 0;
1555 }
1556
1557
1558 /*
1559   destroy a ctdb_client_ip structure
1560  */
1561 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1562 {
1563         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1564                 ctdb_addr_to_str(&ip->addr),
1565                 ntohs(ip->addr.ip.sin_port),
1566                 ip->client_id));
1567
1568         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1569         return 0;
1570 }
1571
1572 /*
1573   called by a client to inform us of a TCP connection that it is managing
1574   that should tickled with an ACK when IP takeover is done
1575   we handle both the old ipv4 style of packets as well as the new ipv4/6
1576   pdus.
1577  */
1578 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1579                                 TDB_DATA indata)
1580 {
1581         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1582         struct ctdb_control_tcp *old_addr = NULL;
1583         struct ctdb_control_tcp_addr new_addr;
1584         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1585         struct ctdb_tcp_list *tcp;
1586         struct ctdb_tcp_connection t;
1587         int ret;
1588         TDB_DATA data;
1589         struct ctdb_client_ip *ip;
1590         struct ctdb_vnn *vnn;
1591         ctdb_sock_addr addr;
1592
1593         switch (indata.dsize) {
1594         case sizeof(struct ctdb_control_tcp):
1595                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1596                 ZERO_STRUCT(new_addr);
1597                 tcp_sock = &new_addr;
1598                 tcp_sock->src.ip  = old_addr->src;
1599                 tcp_sock->dest.ip = old_addr->dest;
1600                 break;
1601         case sizeof(struct ctdb_control_tcp_addr):
1602                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1603                 break;
1604         default:
1605                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1606                                  "to ctdb_control_tcp_client. size was %d but "
1607                                  "only allowed sizes are %lu and %lu\n",
1608                                  (int)indata.dsize,
1609                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1610                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1611                 return -1;
1612         }
1613
1614         addr = tcp_sock->src;
1615         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1616         addr = tcp_sock->dest;
1617         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1618
1619         ZERO_STRUCT(addr);
1620         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1621         vnn = find_public_ip_vnn(ctdb, &addr);
1622         if (vnn == NULL) {
1623                 switch (addr.sa.sa_family) {
1624                 case AF_INET:
1625                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1626                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1627                                         ctdb_addr_to_str(&addr)));
1628                         }
1629                         break;
1630                 case AF_INET6:
1631                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1632                                 ctdb_addr_to_str(&addr)));
1633                         break;
1634                 default:
1635                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1636                 }
1637
1638                 return 0;
1639         }
1640
1641         if (vnn->pnn != ctdb->pnn) {
1642                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1643                         ctdb_addr_to_str(&addr),
1644                         client_id, client->pid));
1645                 /* failing this call will tell smbd to die */
1646                 return -1;
1647         }
1648
1649         ip = talloc(client, struct ctdb_client_ip);
1650         CTDB_NO_MEMORY(ctdb, ip);
1651
1652         ip->ctdb      = ctdb;
1653         ip->addr      = addr;
1654         ip->client_id = client_id;
1655         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1656         DLIST_ADD(ctdb->client_ip_list, ip);
1657
1658         tcp = talloc(client, struct ctdb_tcp_list);
1659         CTDB_NO_MEMORY(ctdb, tcp);
1660
1661         tcp->connection.src_addr = tcp_sock->src;
1662         tcp->connection.dst_addr = tcp_sock->dest;
1663
1664         DLIST_ADD(client->tcp_list, tcp);
1665
1666         t.src_addr = tcp_sock->src;
1667         t.dst_addr = tcp_sock->dest;
1668
1669         data.dptr = (uint8_t *)&t;
1670         data.dsize = sizeof(t);
1671
1672         switch (addr.sa.sa_family) {
1673         case AF_INET:
1674                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1675                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1676                         ctdb_addr_to_str(&tcp_sock->src),
1677                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1678                 break;
1679         case AF_INET6:
1680                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1681                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1682                         ctdb_addr_to_str(&tcp_sock->src),
1683                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1684                 break;
1685         default:
1686                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1687         }
1688
1689
1690         /* tell all nodes about this tcp connection */
1691         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1692                                        CTDB_CONTROL_TCP_ADD,
1693                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1694         if (ret != 0) {
1695                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1696                 return -1;
1697         }
1698
1699         return 0;
1700 }
1701
1702 /*
1703   find a tcp address on a list
1704  */
1705 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1706                                            struct ctdb_tcp_connection *tcp)
1707 {
1708         int i;
1709
1710         if (array == NULL) {
1711                 return NULL;
1712         }
1713
1714         for (i=0;i<array->num;i++) {
1715                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1716                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1717                         return &array->connections[i];
1718                 }
1719         }
1720         return NULL;
1721 }
1722
1723
1724
1725 /*
1726   called by a daemon to inform us of a TCP connection that one of its
1727   clients managing that should tickled with an ACK when IP takeover is
1728   done
1729  */
1730 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1731 {
1732         struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1733         struct ctdb_tcp_array *tcparray;
1734         struct ctdb_tcp_connection tcp;
1735         struct ctdb_vnn *vnn;
1736
1737         vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1738         if (vnn == NULL) {
1739                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1740                         ctdb_addr_to_str(&p->dst_addr)));
1741
1742                 return -1;
1743         }
1744
1745
1746         tcparray = vnn->tcp_array;
1747
1748         /* If this is the first tickle */
1749         if (tcparray == NULL) {
1750                 tcparray = talloc_size(ctdb->nodes, 
1751                         offsetof(struct ctdb_tcp_array, connections) +
1752                         sizeof(struct ctdb_tcp_connection) * 1);
1753                 CTDB_NO_MEMORY(ctdb, tcparray);
1754                 vnn->tcp_array = tcparray;
1755
1756                 tcparray->num = 0;
1757                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1758                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1759
1760                 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1761                 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1762                 tcparray->num++;
1763
1764                 if (tcp_update_needed) {
1765                         vnn->tcp_update_needed = true;
1766                 }
1767                 return 0;
1768         }
1769
1770
1771         /* Do we already have this tickle ?*/
1772         tcp.src_addr = p->src_addr;
1773         tcp.dst_addr = p->dst_addr;
1774         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1775                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1776                         ctdb_addr_to_str(&tcp.dst_addr),
1777                         ntohs(tcp.dst_addr.ip.sin_port),
1778                         vnn->pnn));
1779                 return 0;
1780         }
1781
1782         /* A new tickle, we must add it to the array */
1783         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1784                                         struct ctdb_tcp_connection,
1785                                         tcparray->num+1);
1786         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1787
1788         vnn->tcp_array = tcparray;
1789         tcparray->connections[tcparray->num].src_addr = p->src_addr;
1790         tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1791         tcparray->num++;
1792                                 
1793         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1794                 ctdb_addr_to_str(&tcp.dst_addr),
1795                 ntohs(tcp.dst_addr.ip.sin_port),
1796                 vnn->pnn));
1797
1798         if (tcp_update_needed) {
1799                 vnn->tcp_update_needed = true;
1800         }
1801
1802         return 0;
1803 }
1804
1805
1806 /*
1807   called by a daemon to inform us of a TCP connection that one of its
1808   clients managing that should tickled with an ACK when IP takeover is
1809   done
1810  */
1811 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1812 {
1813         struct ctdb_tcp_connection *tcpp;
1814         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1815
1816         if (vnn == NULL) {
1817                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1818                         ctdb_addr_to_str(&conn->dst_addr)));
1819                 return;
1820         }
1821
1822         /* if the array is empty we cant remove it
1823            and we dont need to do anything
1824          */
1825         if (vnn->tcp_array == NULL) {
1826                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1827                         ctdb_addr_to_str(&conn->dst_addr),
1828                         ntohs(conn->dst_addr.ip.sin_port)));
1829                 return;
1830         }
1831
1832
1833         /* See if we know this connection
1834            if we dont know this connection  then we dont need to do anything
1835          */
1836         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1837         if (tcpp == NULL) {
1838                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1839                         ctdb_addr_to_str(&conn->dst_addr),
1840                         ntohs(conn->dst_addr.ip.sin_port)));
1841                 return;
1842         }
1843
1844
1845         /* We need to remove this entry from the array.
1846            Instead of allocating a new array and copying data to it
1847            we cheat and just copy the last entry in the existing array
1848            to the entry that is to be removed and just shring the 
1849            ->num field
1850          */
1851         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1852         vnn->tcp_array->num--;
1853
1854         /* If we deleted the last entry we also need to remove the entire array
1855          */
1856         if (vnn->tcp_array->num == 0) {
1857                 talloc_free(vnn->tcp_array);
1858                 vnn->tcp_array = NULL;
1859         }               
1860
1861         vnn->tcp_update_needed = true;
1862
1863         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1864                 ctdb_addr_to_str(&conn->src_addr),
1865                 ntohs(conn->src_addr.ip.sin_port)));
1866 }
1867
1868
1869 /*
1870   called by a daemon to inform us of a TCP connection that one of its
1871   clients used are no longer needed in the tickle database
1872  */
1873 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1874 {
1875         struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1876
1877         ctdb_remove_tcp_connection(ctdb, conn);
1878
1879         return 0;
1880 }
1881
1882
1883 /*
1884   called when a daemon restarts - send all tickes for all public addresses
1885   we are serving immediately to the new node.
1886  */
1887 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1888 {
1889 /*XXX here we should send all tickes we are serving to the new node */
1890         return 0;
1891 }
1892
1893
1894 /*
1895   called when a client structure goes away - hook to remove
1896   elements from the tcp_list in all daemons
1897  */
1898 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1899 {
1900         while (client->tcp_list) {
1901                 struct ctdb_tcp_list *tcp = client->tcp_list;
1902                 DLIST_REMOVE(client->tcp_list, tcp);
1903                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1904         }
1905 }
1906
1907
1908 /*
1909   release all IPs on shutdown
1910  */
1911 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1912 {
1913         struct ctdb_vnn *vnn;
1914
1915         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1916                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1917                         ctdb_vnn_unassign_iface(ctdb, vnn);
1918                         continue;
1919                 }
1920                 if (!vnn->iface) {
1921                         continue;
1922                 }
1923                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1924                                   ctdb_vnn_iface_string(vnn),
1925                                   ctdb_addr_to_str(&vnn->public_address),
1926                                   vnn->public_netmask_bits);
1927                 release_kill_clients(ctdb, &vnn->public_address);
1928                 ctdb_vnn_unassign_iface(ctdb, vnn);
1929         }
1930 }
1931
1932
1933 /*
1934   get list of public IPs
1935  */
1936 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1937                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1938 {
1939         int i, num, len;
1940         struct ctdb_all_public_ips *ips;
1941         struct ctdb_vnn *vnn;
1942         bool only_available = false;
1943
1944         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1945                 only_available = true;
1946         }
1947
1948         /* count how many public ip structures we have */
1949         num = 0;
1950         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1951                 num++;
1952         }
1953
1954         len = offsetof(struct ctdb_all_public_ips, ips) + 
1955                 num*sizeof(struct ctdb_public_ip);
1956         ips = talloc_zero_size(outdata, len);
1957         CTDB_NO_MEMORY(ctdb, ips);
1958
1959         i = 0;
1960         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1961                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1962                         continue;
1963                 }
1964                 ips->ips[i].pnn  = vnn->pnn;
1965                 ips->ips[i].addr = vnn->public_address;
1966                 i++;
1967         }
1968         ips->num = i;
1969         len = offsetof(struct ctdb_all_public_ips, ips) +
1970                 i*sizeof(struct ctdb_public_ip);
1971
1972         outdata->dsize = len;
1973         outdata->dptr  = (uint8_t *)ips;
1974
1975         return 0;
1976 }
1977
1978
1979 /*
1980   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1981  */
1982 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1983                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1984 {
1985         int i, num, len;
1986         struct ctdb_all_public_ipsv4 *ips;
1987         struct ctdb_vnn *vnn;
1988
1989         /* count how many public ip structures we have */
1990         num = 0;
1991         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1992                 if (vnn->public_address.sa.sa_family != AF_INET) {
1993                         continue;
1994                 }
1995                 num++;
1996         }
1997
1998         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1999                 num*sizeof(struct ctdb_public_ipv4);
2000         ips = talloc_zero_size(outdata, len);
2001         CTDB_NO_MEMORY(ctdb, ips);
2002
2003         outdata->dsize = len;
2004         outdata->dptr  = (uint8_t *)ips;
2005
2006         ips->num = num;
2007         i = 0;
2008         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2009                 if (vnn->public_address.sa.sa_family != AF_INET) {
2010                         continue;
2011                 }
2012                 ips->ips[i].pnn = vnn->pnn;
2013                 ips->ips[i].sin = vnn->public_address.ip;
2014                 i++;
2015         }
2016
2017         return 0;
2018 }
2019
2020 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2021                                         struct ctdb_req_control *c,
2022                                         TDB_DATA indata,
2023                                         TDB_DATA *outdata)
2024 {
2025         int i, num, len;
2026         ctdb_sock_addr *addr;
2027         struct ctdb_control_public_ip_info *info;
2028         struct ctdb_vnn *vnn;
2029
2030         addr = (ctdb_sock_addr *)indata.dptr;
2031
2032         vnn = find_public_ip_vnn(ctdb, addr);
2033         if (vnn == NULL) {
2034                 /* if it is not a public ip   it could be our 'single ip' */
2035                 if (ctdb->single_ip_vnn) {
2036                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2037                                 vnn = ctdb->single_ip_vnn;
2038                         }
2039                 }
2040         }
2041         if (vnn == NULL) {
2042                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2043                                  "'%s'not a public address\n",
2044                                  ctdb_addr_to_str(addr)));
2045                 return -1;
2046         }
2047
2048         /* count how many public ip structures we have */
2049         num = 0;
2050         for (;vnn->ifaces[num];) {
2051                 num++;
2052         }
2053
2054         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2055                 num*sizeof(struct ctdb_control_iface_info);
2056         info = talloc_zero_size(outdata, len);
2057         CTDB_NO_MEMORY(ctdb, info);
2058
2059         info->ip.addr = vnn->public_address;
2060         info->ip.pnn = vnn->pnn;
2061         info->active_idx = 0xFFFFFFFF;
2062
2063         for (i=0; vnn->ifaces[i]; i++) {
2064                 struct ctdb_iface *cur;
2065
2066                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2067                 if (cur == NULL) {
2068                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2069                                            vnn->ifaces[i]));
2070                         return -1;
2071                 }
2072                 if (vnn->iface == cur) {
2073                         info->active_idx = i;
2074                 }
2075                 strcpy(info->ifaces[i].name, cur->name);
2076                 info->ifaces[i].link_state = cur->link_up;
2077                 info->ifaces[i].references = cur->references;
2078         }
2079         info->num = i;
2080         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2081                 i*sizeof(struct ctdb_control_iface_info);
2082
2083         outdata->dsize = len;
2084         outdata->dptr  = (uint8_t *)info;
2085
2086         return 0;
2087 }
2088
2089 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2090                                 struct ctdb_req_control *c,
2091                                 TDB_DATA *outdata)
2092 {
2093         int i, num, len;
2094         struct ctdb_control_get_ifaces *ifaces;
2095         struct ctdb_iface *cur;
2096
2097         /* count how many public ip structures we have */
2098         num = 0;
2099         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2100                 num++;
2101         }
2102
2103         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2104                 num*sizeof(struct ctdb_control_iface_info);
2105         ifaces = talloc_zero_size(outdata, len);
2106         CTDB_NO_MEMORY(ctdb, ifaces);
2107
2108         i = 0;
2109         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2110                 strcpy(ifaces->ifaces[i].name, cur->name);
2111                 ifaces->ifaces[i].link_state = cur->link_up;
2112                 ifaces->ifaces[i].references = cur->references;
2113                 i++;
2114         }
2115         ifaces->num = i;
2116         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2117                 i*sizeof(struct ctdb_control_iface_info);
2118
2119         outdata->dsize = len;
2120         outdata->dptr  = (uint8_t *)ifaces;
2121
2122         return 0;
2123 }
2124
2125 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2126                                     struct ctdb_req_control *c,
2127                                     TDB_DATA indata)
2128 {
2129         struct ctdb_control_iface_info *info;
2130         struct ctdb_iface *iface;
2131         bool link_up = false;
2132
2133         info = (struct ctdb_control_iface_info *)indata.dptr;
2134
2135         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2136                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2137                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2138                                   len, len, info->name));
2139                 return -1;
2140         }
2141
2142         switch (info->link_state) {
2143         case 0:
2144                 link_up = false;
2145                 break;
2146         case 1:
2147                 link_up = true;
2148                 break;
2149         default:
2150                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2151                                   (unsigned int)info->link_state));
2152                 return -1;
2153         }
2154
2155         if (info->references != 0) {
2156                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2157                                   (unsigned int)info->references));
2158                 return -1;
2159         }
2160
2161         iface = ctdb_find_iface(ctdb, info->name);
2162         if (iface == NULL) {
2163                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2164                                   info->name));
2165                 return -1;
2166         }
2167
2168         if (link_up == iface->link_up) {
2169                 return 0;
2170         }
2171
2172         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2173               ("iface[%s] has changed it's link status %s => %s\n",
2174                iface->name,
2175                iface->link_up?"up":"down",
2176                link_up?"up":"down"));
2177
2178         iface->link_up = link_up;
2179         return 0;
2180 }
2181
2182
2183 /* 
2184    structure containing the listening socket and the list of tcp connections
2185    that the ctdb daemon is to kill
2186 */
2187 struct ctdb_kill_tcp {
2188         struct ctdb_vnn *vnn;
2189         struct ctdb_context *ctdb;
2190         int capture_fd;
2191         struct fd_event *fde;
2192         trbt_tree_t *connections;
2193         void *private_data;
2194 };
2195
2196 /*
2197   a tcp connection that is to be killed
2198  */
2199 struct ctdb_killtcp_con {
2200         ctdb_sock_addr src_addr;
2201         ctdb_sock_addr dst_addr;
2202         int count;
2203         struct ctdb_kill_tcp *killtcp;
2204 };
2205
2206 /* this function is used to create a key to represent this socketpair
2207    in the killtcp tree.
2208    this key is used to insert and lookup matching socketpairs that are
2209    to be tickled and RST
2210 */
2211 #define KILLTCP_KEYLEN  10
2212 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2213 {
2214         static uint32_t key[KILLTCP_KEYLEN];
2215
2216         bzero(key, sizeof(key));
2217
2218         if (src->sa.sa_family != dst->sa.sa_family) {
2219                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2220                 return key;
2221         }
2222         
2223         switch (src->sa.sa_family) {
2224         case AF_INET:
2225                 key[0]  = dst->ip.sin_addr.s_addr;
2226                 key[1]  = src->ip.sin_addr.s_addr;
2227                 key[2]  = dst->ip.sin_port;
2228                 key[3]  = src->ip.sin_port;
2229                 break;
2230         case AF_INET6:
2231                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2232                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2233                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2234                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2235                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2236                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2237                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2238                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2239                 key[8]  = dst->ip6.sin6_port;
2240                 key[9]  = src->ip6.sin6_port;
2241                 break;
2242         default:
2243                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2244                 return key;
2245         }
2246
2247         return key;
2248 }
2249
2250 /*
2251   called when we get a read event on the raw socket
2252  */
2253 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2254                                 uint16_t flags, void *private_data)
2255 {
2256         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2257         struct ctdb_killtcp_con *con;
2258         ctdb_sock_addr src, dst;
2259         uint32_t ack_seq, seq;
2260
2261         if (!(flags & EVENT_FD_READ)) {
2262                 return;
2263         }
2264
2265         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2266                                 killtcp->private_data,
2267                                 &src, &dst,
2268                                 &ack_seq, &seq) != 0) {
2269                 /* probably a non-tcp ACK packet */
2270                 return;
2271         }
2272
2273         /* check if we have this guy in our list of connections
2274            to kill
2275         */
2276         con = trbt_lookuparray32(killtcp->connections, 
2277                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2278         if (con == NULL) {
2279                 /* no this was some other packet we can just ignore */
2280                 return;
2281         }
2282
2283         /* This one has been tickled !
2284            now reset him and remove him from the list.
2285          */
2286         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2287                 ntohs(con->dst_addr.ip.sin_port),
2288                 ctdb_addr_to_str(&con->src_addr),
2289                 ntohs(con->src_addr.ip.sin_port)));
2290
2291         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2292         talloc_free(con);
2293 }
2294
2295
2296 /* when traversing the list of all tcp connections to send tickle acks to
2297    (so that we can capture the ack coming back and kill the connection
2298     by a RST)
2299    this callback is called for each connection we are currently trying to kill
2300 */
2301 static void tickle_connection_traverse(void *param, void *data)
2302 {
2303         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2304
2305         /* have tried too many times, just give up */
2306         if (con->count >= 5) {
2307                 /* can't delete in traverse: reparent to delete_cons */
2308                 talloc_steal(param, con);
2309                 return;
2310         }
2311
2312         /* othervise, try tickling it again */
2313         con->count++;
2314         ctdb_sys_send_tcp(
2315                 (ctdb_sock_addr *)&con->dst_addr,
2316                 (ctdb_sock_addr *)&con->src_addr,
2317                 0, 0, 0);
2318 }
2319
2320
2321 /* 
2322    called every second until all sentenced connections have been reset
2323  */
2324 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2325                                               struct timeval t, void *private_data)
2326 {
2327         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2328         void *delete_cons = talloc_new(NULL);
2329
2330         /* loop over all connections sending tickle ACKs */
2331         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2332
2333         /* now we've finished traverse, it's safe to do deletion. */
2334         talloc_free(delete_cons);
2335
2336         /* If there are no more connections to kill we can remove the
2337            entire killtcp structure
2338          */
2339         if ( (killtcp->connections == NULL) || 
2340              (killtcp->connections->root == NULL) ) {
2341                 talloc_free(killtcp);
2342                 return;
2343         }
2344
2345         /* try tickling them again in a seconds time
2346          */
2347         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2348                         ctdb_tickle_sentenced_connections, killtcp);
2349 }
2350
2351 /*
2352   destroy the killtcp structure
2353  */
2354 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2355 {
2356         if (killtcp->vnn) {
2357                 killtcp->vnn->killtcp = NULL;
2358         }
2359         return 0;
2360 }
2361
2362
2363 /* nothing fancy here, just unconditionally replace any existing
2364    connection structure with the new one.
2365
2366    dont even free the old one if it did exist, that one is talloc_stolen
2367    by the same node in the tree anyway and will be deleted when the new data 
2368    is deleted
2369 */
2370 static void *add_killtcp_callback(void *parm, void *data)
2371 {
2372         return parm;
2373 }
2374
2375 /*
2376   add a tcp socket to the list of connections we want to RST
2377  */
2378 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2379                                        ctdb_sock_addr *s,
2380                                        ctdb_sock_addr *d)
2381 {
2382         ctdb_sock_addr src, dst;
2383         struct ctdb_kill_tcp *killtcp;
2384         struct ctdb_killtcp_con *con;
2385         struct ctdb_vnn *vnn;
2386
2387         ctdb_canonicalize_ip(s, &src);
2388         ctdb_canonicalize_ip(d, &dst);
2389
2390         vnn = find_public_ip_vnn(ctdb, &dst);
2391         if (vnn == NULL) {
2392                 vnn = find_public_ip_vnn(ctdb, &src);
2393         }
2394         if (vnn == NULL) {
2395                 /* if it is not a public ip   it could be our 'single ip' */
2396                 if (ctdb->single_ip_vnn) {
2397                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2398                                 vnn = ctdb->single_ip_vnn;
2399                         }
2400                 }
2401         }
2402         if (vnn == NULL) {
2403                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2404                 return -1;
2405         }
2406
2407         killtcp = vnn->killtcp;
2408         
2409         /* If this is the first connection to kill we must allocate
2410            a new structure
2411          */
2412         if (killtcp == NULL) {
2413                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2414                 CTDB_NO_MEMORY(ctdb, killtcp);
2415
2416                 killtcp->vnn         = vnn;
2417                 killtcp->ctdb        = ctdb;
2418                 killtcp->capture_fd  = -1;
2419                 killtcp->connections = trbt_create(killtcp, 0);
2420
2421                 vnn->killtcp         = killtcp;
2422                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2423         }
2424
2425
2426
2427         /* create a structure that describes this connection we want to
2428            RST and store it in killtcp->connections
2429         */
2430         con = talloc(killtcp, struct ctdb_killtcp_con);
2431         CTDB_NO_MEMORY(ctdb, con);
2432         con->src_addr = src;
2433         con->dst_addr = dst;
2434         con->count    = 0;
2435         con->killtcp  = killtcp;
2436
2437
2438         trbt_insertarray32_callback(killtcp->connections,
2439                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2440                         add_killtcp_callback, con);
2441
2442         /* 
2443            If we dont have a socket to listen on yet we must create it
2444          */
2445         if (killtcp->capture_fd == -1) {
2446                 const char *iface = ctdb_vnn_iface_string(vnn);
2447                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2448                 if (killtcp->capture_fd == -1) {
2449                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2450                                           "socket on iface '%s' for killtcp (%s)\n",
2451                                           iface, strerror(errno)));
2452                         goto failed;
2453                 }
2454         }
2455
2456
2457         if (killtcp->fde == NULL) {
2458                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2459                                             EVENT_FD_READ,
2460                                             capture_tcp_handler, killtcp);
2461                 tevent_fd_set_auto_close(killtcp->fde);
2462
2463                 /* We also need to set up some events to tickle all these connections
2464                    until they are all reset
2465                 */
2466                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2467                                 ctdb_tickle_sentenced_connections, killtcp);
2468         }
2469
2470         /* tickle him once now */
2471         ctdb_sys_send_tcp(
2472                 &con->dst_addr,
2473                 &con->src_addr,
2474                 0, 0, 0);
2475
2476         return 0;
2477
2478 failed:
2479         talloc_free(vnn->killtcp);
2480         vnn->killtcp = NULL;
2481         return -1;
2482 }
2483
2484 /*
2485   kill a TCP connection.
2486  */
2487 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2488 {
2489         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2490
2491         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2492 }
2493
2494 /*
2495   called by a daemon to inform us of the entire list of TCP tickles for
2496   a particular public address.
2497   this control should only be sent by the node that is currently serving
2498   that public address.
2499  */
2500 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2501 {
2502         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2503         struct ctdb_tcp_array *tcparray;
2504         struct ctdb_vnn *vnn;
2505
2506         /* We must at least have tickles.num or else we cant verify the size
2507            of the received data blob
2508          */
2509         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2510                                         tickles.connections)) {
2511                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2512                 return -1;
2513         }
2514
2515         /* verify that the size of data matches what we expect */
2516         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2517                                 tickles.connections)
2518                          + sizeof(struct ctdb_tcp_connection)
2519                                  * list->tickles.num) {
2520                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2521                 return -1;
2522         }       
2523
2524         vnn = find_public_ip_vnn(ctdb, &list->addr);
2525         if (vnn == NULL) {
2526                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2527                         ctdb_addr_to_str(&list->addr)));
2528
2529                 return 1;
2530         }
2531
2532         /* remove any old ticklelist we might have */
2533         talloc_free(vnn->tcp_array);
2534         vnn->tcp_array = NULL;
2535
2536         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2537         CTDB_NO_MEMORY(ctdb, tcparray);
2538
2539         tcparray->num = list->tickles.num;
2540
2541         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2542         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2543
2544         memcpy(tcparray->connections, &list->tickles.connections[0], 
2545                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2546
2547         /* We now have a new fresh tickle list array for this vnn */
2548         vnn->tcp_array = talloc_steal(vnn, tcparray);
2549         
2550         return 0;
2551 }
2552
2553 /*
2554   called to return the full list of tickles for the puclic address associated 
2555   with the provided vnn
2556  */
2557 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2558 {
2559         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2560         struct ctdb_control_tcp_tickle_list *list;
2561         struct ctdb_tcp_array *tcparray;
2562         int num;
2563         struct ctdb_vnn *vnn;
2564
2565         vnn = find_public_ip_vnn(ctdb, addr);
2566         if (vnn == NULL) {
2567                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2568                         ctdb_addr_to_str(addr)));
2569
2570                 return 1;
2571         }
2572
2573         tcparray = vnn->tcp_array;
2574         if (tcparray) {
2575                 num = tcparray->num;
2576         } else {
2577                 num = 0;
2578         }
2579
2580         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2581                                 tickles.connections)
2582                         + sizeof(struct ctdb_tcp_connection) * num;
2583
2584         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2585         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2586         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2587
2588         list->addr = *addr;
2589         list->tickles.num = num;
2590         if (num) {
2591                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2592                         sizeof(struct ctdb_tcp_connection) * num);
2593         }
2594
2595         return 0;
2596 }
2597
2598
2599 /*
2600   set the list of all tcp tickles for a public address
2601  */
2602 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2603                               struct timeval timeout, uint32_t destnode, 
2604                               ctdb_sock_addr *addr,
2605                               struct ctdb_tcp_array *tcparray)
2606 {
2607         int ret, num;
2608         TDB_DATA data;
2609         struct ctdb_control_tcp_tickle_list *list;
2610
2611         if (tcparray) {
2612                 num = tcparray->num;
2613         } else {
2614                 num = 0;
2615         }
2616
2617         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2618                                 tickles.connections) +
2619                         sizeof(struct ctdb_tcp_connection) * num;
2620         data.dptr = talloc_size(ctdb, data.dsize);
2621         CTDB_NO_MEMORY(ctdb, data.dptr);
2622
2623         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2624         list->addr = *addr;
2625         list->tickles.num = num;
2626         if (tcparray) {
2627                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2628         }
2629
2630         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2631                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2632                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2633         if (ret != 0) {
2634                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2635                 return -1;
2636         }
2637
2638         talloc_free(data.dptr);
2639
2640         return ret;
2641 }
2642
2643
2644 /*
2645   perform tickle updates if required
2646  */
2647 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2648                                 struct timed_event *te, 
2649                                 struct timeval t, void *private_data)
2650 {
2651         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2652         int ret;
2653         struct ctdb_vnn *vnn;
2654
2655         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2656                 /* we only send out updates for public addresses that 
2657                    we have taken over
2658                  */
2659                 if (ctdb->pnn != vnn->pnn) {
2660                         continue;
2661                 }
2662                 /* We only send out the updates if we need to */
2663                 if (!vnn->tcp_update_needed) {
2664                         continue;
2665                 }
2666                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2667                                 TAKEOVER_TIMEOUT(),
2668                                 CTDB_BROADCAST_CONNECTED,
2669                                 &vnn->public_address,
2670                                 vnn->tcp_array);
2671                 if (ret != 0) {
2672                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2673                                 ctdb_addr_to_str(&vnn->public_address)));
2674                 }
2675         }
2676
2677         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2678                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2679                              ctdb_update_tcp_tickles, ctdb);
2680 }               
2681         
2682
2683 /*
2684   start periodic update of tcp tickles
2685  */
2686 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2687 {
2688         ctdb->tickle_update_context = talloc_new(ctdb);
2689
2690         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2691                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2692                              ctdb_update_tcp_tickles, ctdb);
2693 }
2694
2695
2696
2697
2698 struct control_gratious_arp {
2699         struct ctdb_context *ctdb;
2700         ctdb_sock_addr addr;
2701         const char *iface;
2702         int count;
2703 };
2704
2705 /*
2706   send a control_gratuitous arp
2707  */
2708 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2709                                   struct timeval t, void *private_data)
2710 {
2711         int ret;
2712         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2713                                                         struct control_gratious_arp);
2714
2715         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2716         if (ret != 0) {
2717                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2718                                  arp->iface, strerror(errno)));
2719         }
2720
2721
2722         arp->count++;
2723         if (arp->count == CTDB_ARP_REPEAT) {
2724                 talloc_free(arp);
2725                 return;
2726         }
2727
2728         event_add_timed(arp->ctdb->ev, arp, 
2729                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2730                         send_gratious_arp, arp);
2731 }
2732
2733
2734 /*
2735   send a gratious arp 
2736  */
2737 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2738 {
2739         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2740         struct control_gratious_arp *arp;
2741
2742         /* verify the size of indata */
2743         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2744                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2745                                  (unsigned)indata.dsize, 
2746                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2747                 return -1;
2748         }
2749         if (indata.dsize != 
2750                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2751                 + gratious_arp->len ) ){
2752
2753                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2754                         "but should be %u bytes\n", 
2755                          (unsigned)indata.dsize, 
2756                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2757                 return -1;
2758         }
2759
2760
2761         arp = talloc(ctdb, struct control_gratious_arp);
2762         CTDB_NO_MEMORY(ctdb, arp);
2763
2764         arp->ctdb  = ctdb;
2765         arp->addr   = gratious_arp->addr;
2766         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2767         CTDB_NO_MEMORY(ctdb, arp->iface);
2768         arp->count = 0;
2769         
2770         event_add_timed(arp->ctdb->ev, arp, 
2771                         timeval_zero(), send_gratious_arp, arp);
2772
2773         return 0;
2774 }
2775
2776 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2777 {
2778         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2779         int ret;
2780
2781         /* verify the size of indata */
2782         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2783                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2784                 return -1;
2785         }
2786         if (indata.dsize != 
2787                 ( offsetof(struct ctdb_control_ip_iface, iface)
2788                 + pub->len ) ){
2789
2790                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2791                         "but should be %u bytes\n", 
2792                          (unsigned)indata.dsize, 
2793                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2794                 return -1;
2795         }
2796
2797         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2798
2799         if (ret != 0) {
2800                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2801                 return -1;
2802         }
2803
2804         return 0;
2805 }
2806
2807 /*
2808   called when releaseip event finishes for del_public_address
2809  */
2810 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2811                                 void *private_data)
2812 {
2813         talloc_free(private_data);
2814 }
2815
2816 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2817 {
2818         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2819         struct ctdb_vnn *vnn;
2820         int ret;
2821
2822         /* verify the size of indata */
2823         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2824                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2825                 return -1;
2826         }
2827         if (indata.dsize != 
2828                 ( offsetof(struct ctdb_control_ip_iface, iface)
2829                 + pub->len ) ){
2830
2831                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2832                         "but should be %u bytes\n", 
2833                          (unsigned)indata.dsize, 
2834                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2835                 return -1;
2836         }
2837
2838         /* walk over all public addresses until we find a match */
2839         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2840                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2841                         TALLOC_CTX *mem_ctx;
2842
2843                         DLIST_REMOVE(ctdb->vnn, vnn);
2844                         if (vnn->iface == NULL) {
2845                                 talloc_free(vnn);
2846                                 return 0;
2847                         }
2848
2849                         mem_ctx = talloc_new(ctdb);
2850                         ret = ctdb_event_script_callback(ctdb, 
2851                                          mem_ctx, delete_ip_callback, mem_ctx,
2852                                          false,
2853                                          CTDB_EVENT_RELEASE_IP,
2854                                          "%s %s %u",
2855                                          ctdb_vnn_iface_string(vnn),
2856                                          ctdb_addr_to_str(&vnn->public_address),
2857                                          vnn->public_netmask_bits);
2858                         ctdb_vnn_unassign_iface(ctdb, vnn);
2859                         talloc_free(vnn);
2860                         if (ret != 0) {
2861                                 return -1;
2862                         }
2863                         return 0;
2864                 }
2865         }
2866
2867         return -1;
2868 }
2869
2870 /* This function is called from the recovery daemon to verify that a remote
2871    node has the expected ip allocation.
2872    This is verified against ctdb->ip_tree
2873 */
2874 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2875 {
2876         struct ctdb_public_ip_list *tmp_ip; 
2877         int i;
2878
2879         if (ctdb->ip_tree == NULL) {
2880                 /* dont know the expected allocation yet, assume remote node
2881                    is correct. */
2882                 return 0;
2883         }
2884
2885         if (ips == NULL) {
2886                 return 0;
2887         }
2888
2889         for (i=0; i<ips->num; i++) {
2890                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2891                 if (tmp_ip == NULL) {
2892                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2893                         return -1;
2894                 }
2895
2896                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2897                         continue;
2898                 }
2899
2900                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2901                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2902                         return -1;
2903                 }
2904         }
2905
2906         return 0;
2907 }
2908
2909 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2910 {
2911         struct ctdb_public_ip_list *tmp_ip; 
2912
2913         if (ctdb->ip_tree == NULL) {
2914                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2915                 return -1;
2916         }
2917
2918         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2919         if (tmp_ip == NULL) {
2920                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2921                 return -1;
2922         }
2923
2924         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2925         tmp_ip->pnn = ip->pnn;
2926
2927         return 0;
2928 }