remove the timeout parameter to ctdb_control_send() and
[sahlberg/ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "include/ctdb_protocol.h"
28 #include "include/ctdb_private.h"
29 #include "common/rb_tree.h"
30
31
32 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
33
34 #define CTDB_ARP_INTERVAL 1
35 #define CTDB_ARP_REPEAT   3
36
37 struct ctdb_iface {
38         struct ctdb_iface *prev, *next;
39         const char *name;
40         bool link_up;
41         uint32_t references;
42 };
43
44 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
45 {
46         if (vnn->iface) {
47                 return vnn->iface->name;
48         }
49
50         return "__none__";
51 }
52
53 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
54 {
55         struct ctdb_iface *i;
56
57         /* Verify that we dont have an entry for this ip yet */
58         for (i=ctdb->ifaces;i;i=i->next) {
59                 if (strcmp(i->name, iface) == 0) {
60                         return 0;
61                 }
62         }
63
64         /* create a new structure for this interface */
65         i = talloc_zero(ctdb, struct ctdb_iface);
66         CTDB_NO_MEMORY_FATAL(ctdb, i);
67         i->name = talloc_strdup(i, iface);
68         CTDB_NO_MEMORY(ctdb, i->name);
69         i->link_up = false;
70
71         DLIST_ADD(ctdb->ifaces, i);
72
73         return 0;
74 }
75
76 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
77                                           const char *iface)
78 {
79         struct ctdb_iface *i;
80
81         /* Verify that we dont have an entry for this ip yet */
82         for (i=ctdb->ifaces;i;i=i->next) {
83                 if (strcmp(i->name, iface) == 0) {
84                         return i;
85                 }
86         }
87
88         return NULL;
89 }
90
91 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
92                                               struct ctdb_vnn *vnn)
93 {
94         int i;
95         struct ctdb_iface *cur = NULL;
96         struct ctdb_iface *best = NULL;
97
98         for (i=0; vnn->ifaces[i]; i++) {
99
100                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
101                 if (cur == NULL) {
102                         continue;
103                 }
104
105                 if (!cur->link_up) {
106                         continue;
107                 }
108
109                 if (best == NULL) {
110                         best = cur;
111                         continue;
112                 }
113
114                 if (cur->references < best->references) {
115                         best = cur;
116                         continue;
117                 }
118         }
119
120         return best;
121 }
122
123 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
124                                      struct ctdb_vnn *vnn)
125 {
126         struct ctdb_iface *best = NULL;
127
128         if (vnn->iface) {
129                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
130                                    "still assigned to iface '%s'\n",
131                                    ctdb_addr_to_str(&vnn->public_address),
132                                    ctdb_vnn_iface_string(vnn)));
133                 return 0;
134         }
135
136         best = ctdb_vnn_best_iface(ctdb, vnn);
137         if (best == NULL) {
138                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
139                                   "cannot assign to iface any iface\n",
140                                   ctdb_addr_to_str(&vnn->public_address)));
141                 return -1;
142         }
143
144         vnn->iface = best;
145         best->references++;
146         vnn->pnn = ctdb->pnn;
147
148         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
149                            "now assigned to iface '%s' refs[%d]\n",
150                            ctdb_addr_to_str(&vnn->public_address),
151                            ctdb_vnn_iface_string(vnn),
152                            best->references));
153         return 0;
154 }
155
156 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
157                                     struct ctdb_vnn *vnn)
158 {
159         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
160                            "now unassigned (old iface '%s' refs[%d])\n",
161                            ctdb_addr_to_str(&vnn->public_address),
162                            ctdb_vnn_iface_string(vnn),
163                            vnn->iface?vnn->iface->references:0));
164         if (vnn->iface) {
165                 vnn->iface->references--;
166         }
167         vnn->iface = NULL;
168         if (vnn->pnn == ctdb->pnn) {
169                 vnn->pnn = -1;
170         }
171 }
172
173 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
174                                struct ctdb_vnn *vnn)
175 {
176         int i;
177
178         if (vnn->iface && vnn->iface->link_up) {
179                 return true;
180         }
181
182         for (i=0; vnn->ifaces[i]; i++) {
183                 struct ctdb_iface *cur;
184
185                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
186                 if (cur == NULL) {
187                         continue;
188                 }
189
190                 if (cur->link_up) {
191                         return true;
192                 }
193         }
194
195         return false;
196 }
197
198 struct ctdb_takeover_arp {
199         struct ctdb_context *ctdb;
200         uint32_t count;
201         ctdb_sock_addr addr;
202         struct ctdb_tcp_array *tcparray;
203         struct ctdb_vnn *vnn;
204 };
205
206
207 /*
208   lists of tcp endpoints
209  */
210 struct ctdb_tcp_list {
211         struct ctdb_tcp_list *prev, *next;
212         struct ctdb_tcp_connection connection;
213 };
214
215 /*
216   list of clients to kill on IP release
217  */
218 struct ctdb_client_ip {
219         struct ctdb_client_ip *prev, *next;
220         struct ctdb_context *ctdb;
221         ctdb_sock_addr addr;
222         uint32_t client_id;
223 };
224
225
226 /*
227   send a gratuitous arp
228  */
229 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
230                                   struct timeval t, void *private_data)
231 {
232         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
233                                                         struct ctdb_takeover_arp);
234         int i, ret;
235         struct ctdb_tcp_array *tcparray;
236         const char *iface = ctdb_vnn_iface_string(arp->vnn);
237
238         ret = ctdb_sys_send_arp(&arp->addr, iface);
239         if (ret != 0) {
240                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
241                                   iface, strerror(errno)));
242         }
243
244         tcparray = arp->tcparray;
245         if (tcparray) {
246                 for (i=0;i<tcparray->num;i++) {
247                         struct ctdb_tcp_connection *tcon;
248
249                         tcon = &tcparray->connections[i];
250                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
251                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
252                                 ctdb_addr_to_str(&tcon->src_addr),
253                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
254                         ret = ctdb_sys_send_tcp(
255                                 &tcon->src_addr, 
256                                 &tcon->dst_addr,
257                                 0, 0, 0);
258                         if (ret != 0) {
259                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
260                                         ctdb_addr_to_str(&tcon->src_addr)));
261                         }
262                 }
263         }
264
265         arp->count++;
266
267         if (arp->count == CTDB_ARP_REPEAT) {
268                 talloc_free(arp);
269                 return;
270         }
271
272         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
273                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
274                         ctdb_control_send_arp, arp);
275 }
276
277 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
278                                        struct ctdb_vnn *vnn)
279 {
280         struct ctdb_takeover_arp *arp;
281         struct ctdb_tcp_array *tcparray;
282
283         if (!vnn->takeover_ctx) {
284                 vnn->takeover_ctx = talloc_new(vnn);
285                 if (!vnn->takeover_ctx) {
286                         return -1;
287                 }
288         }
289
290         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
291         if (!arp) {
292                 return -1;
293         }
294
295         arp->ctdb = ctdb;
296         arp->addr = vnn->public_address;
297         arp->vnn  = vnn;
298
299         tcparray = vnn->tcp_array;
300         if (tcparray) {
301                 /* add all of the known tcp connections for this IP to the
302                    list of tcp connections to send tickle acks for */
303                 arp->tcparray = talloc_steal(arp, tcparray);
304
305                 vnn->tcp_array = NULL;
306                 vnn->tcp_update_needed = true;
307         }
308
309         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
310                         timeval_zero(), ctdb_control_send_arp, arp);
311
312         return 0;
313 }
314
315 struct takeover_callback_state {
316         struct ctdb_req_control *c;
317         ctdb_sock_addr *addr;
318         struct ctdb_vnn *vnn;
319 };
320
321 struct ctdb_do_takeip_state {
322         struct ctdb_req_control *c;
323         struct ctdb_vnn *vnn;
324 };
325
326 /*
327   called when takeip event finishes
328  */
329 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
330                                     void *private_data)
331 {
332         struct ctdb_do_takeip_state *state =
333                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
334         int32_t ret;
335
336         if (status != 0) {
337                 if (status == -ETIME) {
338                         ctdb_ban_self(ctdb);
339                 }
340                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341                                  ctdb_addr_to_str(&state->vnn->public_address),
342                                  ctdb_vnn_iface_string(state->vnn)));
343                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
344                 talloc_free(state);
345                 return;
346         }
347
348         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
349         if (ret != 0) {
350                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
351                 talloc_free(state);
352                 return;
353         }
354
355         /* the control succeeded */
356         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
357         talloc_free(state);
358         return;
359 }
360
361 /*
362   take over an ip address
363  */
364 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
365                               struct ctdb_req_control *c,
366                               struct ctdb_vnn *vnn)
367 {
368         int ret;
369         struct ctdb_do_takeip_state *state;
370
371         ret = ctdb_vnn_assign_iface(ctdb, vnn);
372         if (ret != 0) {
373                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
374                                  "assin a usable interface\n",
375                                  ctdb_addr_to_str(&vnn->public_address),
376                                  vnn->public_netmask_bits));
377                 return -1;
378         }
379
380         state = talloc(vnn, struct ctdb_do_takeip_state);
381         CTDB_NO_MEMORY(ctdb, state);
382
383         state->c = talloc_steal(ctdb, c);
384         state->vnn   = vnn;
385
386         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
387                             ctdb_addr_to_str(&vnn->public_address),
388                             vnn->public_netmask_bits,
389                             ctdb_vnn_iface_string(vnn)));
390
391         ret = ctdb_event_script_callback(ctdb,
392                                          state,
393                                          ctdb_do_takeip_callback,
394                                          state,
395                                          false,
396                                          CTDB_EVENT_TAKE_IP,
397                                          "%s %s %u",
398                                          ctdb_vnn_iface_string(vnn),
399                                          ctdb_addr_to_str(&vnn->public_address),
400                                          vnn->public_netmask_bits);
401
402         if (ret != 0) {
403                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
404                         ctdb_addr_to_str(&vnn->public_address),
405                         ctdb_vnn_iface_string(vnn)));
406                 talloc_free(state);
407                 return -1;
408         }
409
410         return 0;
411 }
412
413 struct ctdb_do_updateip_state {
414         struct ctdb_req_control *c;
415         struct ctdb_iface *old;
416         struct ctdb_vnn *vnn;
417 };
418
419 /*
420   called when updateip event finishes
421  */
422 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
423                                       void *private_data)
424 {
425         struct ctdb_do_updateip_state *state =
426                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
427         int32_t ret;
428
429         if (status != 0) {
430                 if (status == -ETIME) {
431                         ctdb_ban_self(ctdb);
432                 }
433                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
434                         ctdb_addr_to_str(&state->vnn->public_address),
435                         state->old->name,
436                         ctdb_vnn_iface_string(state->vnn)));
437
438                 /*
439                  * All we can do is reset the old interface
440                  * and let the next run fix it
441                  */
442                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
443                 state->vnn->iface = state->old;
444                 state->vnn->iface->references++;
445
446                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
447                 talloc_free(state);
448                 return;
449         }
450
451         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
452         if (ret != 0) {
453                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
454                 talloc_free(state);
455                 return;
456         }
457
458         /* the control succeeded */
459         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
460         talloc_free(state);
461         return;
462 }
463
464 /*
465   update (move) an ip address
466  */
467 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
468                                 struct ctdb_req_control *c,
469                                 struct ctdb_vnn *vnn)
470 {
471         int ret;
472         struct ctdb_do_updateip_state *state;
473         struct ctdb_iface *old = vnn->iface;
474
475         ctdb_vnn_unassign_iface(ctdb, vnn);
476         ret = ctdb_vnn_assign_iface(ctdb, vnn);
477         if (ret != 0) {
478                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
479                                  "assin a usable interface (old iface '%s')\n",
480                                  ctdb_addr_to_str(&vnn->public_address),
481                                  vnn->public_netmask_bits,
482                                  old->name));
483                 return -1;
484         }
485
486         if (vnn->iface == old) {
487                 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
488                                  "assin a same interface '%s'\n",
489                                  ctdb_addr_to_str(&vnn->public_address),
490                                  vnn->public_netmask_bits,
491                                  old->name));
492                 return -1;
493         }
494
495         state = talloc(vnn, struct ctdb_do_updateip_state);
496         CTDB_NO_MEMORY(ctdb, state);
497
498         state->c = talloc_steal(ctdb, c);
499         state->old = old;
500         state->vnn = vnn;
501
502         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
503                             "interface %s to %s\n",
504                             ctdb_addr_to_str(&vnn->public_address),
505                             vnn->public_netmask_bits,
506                             old->name,
507                             ctdb_vnn_iface_string(vnn)));
508
509         ret = ctdb_event_script_callback(ctdb,
510                                          state,
511                                          ctdb_do_updateip_callback,
512                                          state,
513                                          false,
514                                          CTDB_EVENT_UPDATE_IP,
515                                          "%s %s %s %u",
516                                          state->old->name,
517                                          ctdb_vnn_iface_string(vnn),
518                                          ctdb_addr_to_str(&vnn->public_address),
519                                          vnn->public_netmask_bits);
520         if (ret != 0) {
521                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
522                                  ctdb_addr_to_str(&vnn->public_address),
523                                  old->name, ctdb_vnn_iface_string(vnn)));
524                 talloc_free(state);
525                 return -1;
526         }
527
528         return 0;
529 }
530
531 /*
532   Find the vnn of the node that has a public ip address
533   returns -1 if the address is not known as a public address
534  */
535 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
536 {
537         struct ctdb_vnn *vnn;
538
539         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
540                 if (ctdb_same_ip(&vnn->public_address, addr)) {
541                         return vnn;
542                 }
543         }
544
545         return NULL;
546 }
547
548 /*
549   take over an ip address
550  */
551 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
552                                  struct ctdb_req_control *c,
553                                  TDB_DATA indata,
554                                  bool *async_reply)
555 {
556         int ret;
557         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
558         struct ctdb_vnn *vnn;
559         bool have_ip = false;
560         bool do_updateip = false;
561         bool do_takeip = false;
562         struct ctdb_iface *best_iface = NULL;
563
564         if (pip->pnn != ctdb->pnn) {
565                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
566                                  "with pnn %d, but we're node %d\n",
567                                  ctdb_addr_to_str(&pip->addr),
568                                  pip->pnn, ctdb->pnn));
569                 return -1;
570         }
571
572         /* update out vnn list */
573         vnn = find_public_ip_vnn(ctdb, &pip->addr);
574         if (vnn == NULL) {
575                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
576                         ctdb_addr_to_str(&pip->addr)));
577                 return 0;
578         }
579
580         have_ip = ctdb_sys_have_ip(&pip->addr);
581         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
582         if (best_iface == NULL) {
583                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
584                                  "a usable interface (old %s, have_ip %d)\n",
585                                  ctdb_addr_to_str(&vnn->public_address),
586                                  vnn->public_netmask_bits,
587                                  ctdb_vnn_iface_string(vnn),
588                                  have_ip));
589                 return -1;
590         }
591
592         if (vnn->iface == NULL && have_ip) {
593                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
594                                   "but we have no interface assigned, has someone manually configured it?"
595                                   "banning ourself\n",
596                                  ctdb_addr_to_str(&vnn->public_address)));
597                 ctdb_ban_self(ctdb);
598                 return -1;
599         }
600
601         if (vnn->pnn != ctdb->pnn && have_ip) {
602                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
603                                   "and we have it on iface[%s], but it was assigned to node %d"
604                                   "and we are node %d, banning ourself\n",
605                                  ctdb_addr_to_str(&vnn->public_address),
606                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
607                 ctdb_ban_self(ctdb);
608                 return -1;
609         }
610
611         if (vnn->iface) {
612                 if (vnn->iface->link_up) {
613                         /* only move when the rebalance gains something */
614                         if (vnn->iface->references > (best_iface->references + 1)) {
615                                 do_updateip = true;
616                         }
617                 } else if (vnn->iface != best_iface) {
618                         do_updateip = true;
619                 }
620         }
621
622         if (!have_ip) {
623                 if (do_updateip) {
624                         ctdb_vnn_unassign_iface(ctdb, vnn);
625                         do_updateip = false;
626                 }
627                 do_takeip = true;
628         }
629
630         if (do_takeip) {
631                 ret = ctdb_do_takeip(ctdb, c, vnn);
632                 if (ret != 0) {
633                         return -1;
634                 }
635         } else if (do_updateip) {
636                 ret = ctdb_do_updateip(ctdb, c, vnn);
637                 if (ret != 0) {
638                         return -1;
639                 }
640         } else {
641                 /*
642                  * The interface is up and the kernel known the ip
643                  * => do nothing
644                  */
645                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
646                         ctdb_addr_to_str(&pip->addr),
647                         vnn->public_netmask_bits,
648                         ctdb_vnn_iface_string(vnn)));
649                 return 0;
650         }
651
652         /* tell ctdb_control.c that we will be replying asynchronously */
653         *async_reply = true;
654
655         return 0;
656 }
657
658 /*
659   takeover an ip address old v4 style
660  */
661 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
662                                 struct ctdb_req_control *c,
663                                 TDB_DATA indata, 
664                                 bool *async_reply)
665 {
666         TDB_DATA data;
667         
668         data.dsize = sizeof(struct ctdb_public_ip);
669         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
670         CTDB_NO_MEMORY(ctdb, data.dptr);
671         
672         memcpy(data.dptr, indata.dptr, indata.dsize);
673         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
674 }
675
676 /*
677   kill any clients that are registered with a IP that is being released
678  */
679 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
680 {
681         struct ctdb_client_ip *ip;
682
683         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
684                 ctdb_addr_to_str(addr)));
685
686         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
687                 ctdb_sock_addr tmp_addr;
688
689                 tmp_addr = ip->addr;
690                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
691                         ip->client_id,
692                         ctdb_addr_to_str(&ip->addr)));
693
694                 if (ctdb_same_ip(&tmp_addr, addr)) {
695                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
696                                                                      ip->client_id, 
697                                                                      struct ctdb_client);
698                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
699                                 ip->client_id,
700                                 ctdb_addr_to_str(&ip->addr),
701                                 client->pid));
702
703                         if (client->pid != 0) {
704                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
705                                         (unsigned)client->pid,
706                                         ctdb_addr_to_str(addr),
707                                         ip->client_id));
708                                 kill(client->pid, SIGKILL);
709                         }
710                 }
711         }
712 }
713
714 /*
715   called when releaseip event finishes
716  */
717 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
718                                 void *private_data)
719 {
720         struct takeover_callback_state *state = 
721                 talloc_get_type(private_data, struct takeover_callback_state);
722         TDB_DATA data;
723
724         if (status == -ETIME) {
725                 ctdb_ban_self(ctdb);
726         }
727
728         /* send a message to all clients of this node telling them
729            that the cluster has been reconfigured and they should
730            release any sockets on this IP */
731         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
732         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
733         data.dsize = strlen((char *)data.dptr)+1;
734
735         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
736
737         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
738
739         /* kill clients that have registered with this IP */
740         release_kill_clients(ctdb, state->addr);
741
742         ctdb_vnn_unassign_iface(ctdb, state->vnn);
743
744         /* the control succeeded */
745         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
746         talloc_free(state);
747 }
748
749 /*
750   release an ip address
751  */
752 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
753                                 struct ctdb_req_control *c,
754                                 TDB_DATA indata, 
755                                 bool *async_reply)
756 {
757         int ret;
758         struct takeover_callback_state *state;
759         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
760         struct ctdb_vnn *vnn;
761
762         /* update our vnn list */
763         vnn = find_public_ip_vnn(ctdb, &pip->addr);
764         if (vnn == NULL) {
765                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
766                         ctdb_addr_to_str(&pip->addr)));
767                 return 0;
768         }
769         vnn->pnn = pip->pnn;
770
771         /* stop any previous arps */
772         talloc_free(vnn->takeover_ctx);
773         vnn->takeover_ctx = NULL;
774
775         if (!ctdb_sys_have_ip(&pip->addr)) {
776                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
777                         ctdb_addr_to_str(&pip->addr),
778                         vnn->public_netmask_bits, 
779                         ctdb_vnn_iface_string(vnn)));
780                 ctdb_vnn_unassign_iface(ctdb, vnn);
781                 return 0;
782         }
783
784         if (vnn->iface == NULL) {
785                 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
786                                   "but we have no interface assigned, has someone manually configured it?"
787                                   "banning ourself\n",
788                                  ctdb_addr_to_str(&vnn->public_address)));
789                 ctdb_ban_self(ctdb);
790                 return -1;
791         }
792
793         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
794                 ctdb_addr_to_str(&pip->addr),
795                 vnn->public_netmask_bits, 
796                 ctdb_vnn_iface_string(vnn),
797                 pip->pnn));
798
799         state = talloc(ctdb, struct takeover_callback_state);
800         CTDB_NO_MEMORY(ctdb, state);
801
802         state->c = talloc_steal(state, c);
803         state->addr = talloc(state, ctdb_sock_addr);       
804         CTDB_NO_MEMORY(ctdb, state->addr);
805         *state->addr = pip->addr;
806         state->vnn   = vnn;
807
808         ret = ctdb_event_script_callback(ctdb, 
809                                          state, release_ip_callback, state,
810                                          false,
811                                          CTDB_EVENT_RELEASE_IP,
812                                          "%s %s %u",
813                                          ctdb_vnn_iface_string(vnn),
814                                          ctdb_addr_to_str(&pip->addr),
815                                          vnn->public_netmask_bits);
816         if (ret != 0) {
817                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
818                         ctdb_addr_to_str(&pip->addr),
819                         ctdb_vnn_iface_string(vnn)));
820                 talloc_free(state);
821                 return -1;
822         }
823
824         /* tell the control that we will be reply asynchronously */
825         *async_reply = true;
826         return 0;
827 }
828
829 /*
830   release an ip address old v4 style
831  */
832 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
833                                 struct ctdb_req_control *c,
834                                 TDB_DATA indata, 
835                                 bool *async_reply)
836 {
837         TDB_DATA data;
838         
839         data.dsize = sizeof(struct ctdb_public_ip);
840         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
841         CTDB_NO_MEMORY(ctdb, data.dptr);
842         
843         memcpy(data.dptr, indata.dptr, indata.dsize);
844         return ctdb_control_release_ip(ctdb, c, data, async_reply);
845 }
846
847
848 static int ctdb_add_public_address(struct ctdb_context *ctdb,
849                                    ctdb_sock_addr *addr,
850                                    unsigned mask, const char *ifaces)
851 {
852         struct ctdb_vnn      *vnn;
853         uint32_t num = 0;
854         char *tmp;
855         const char *iface;
856         int i;
857         int ret;
858
859         /* Verify that we dont have an entry for this ip yet */
860         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
861                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
862                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
863                                 ctdb_addr_to_str(addr)));
864                         return -1;
865                 }               
866         }
867
868         /* create a new vnn structure for this ip address */
869         vnn = talloc_zero(ctdb, struct ctdb_vnn);
870         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
871         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
872         tmp = talloc_strdup(vnn, ifaces);
873         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
874         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
875                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
876                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
877                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
878                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
879                 num++;
880         }
881         talloc_free(tmp);
882         vnn->ifaces[num] = NULL;
883         vnn->public_address      = *addr;
884         vnn->public_netmask_bits = mask;
885         vnn->pnn                 = -1;
886
887         for (i=0; vnn->ifaces[i]; i++) {
888                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
889                 if (ret != 0) {
890                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
891                                            "for public_address[%s]\n",
892                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
893                         talloc_free(vnn);
894                         return -1;
895                 }
896         }
897
898         DLIST_ADD(ctdb->vnn, vnn);
899
900         return 0;
901 }
902
903 /*
904   setup the event script directory
905 */
906 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
907 {
908         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
909         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
910         return 0;
911 }
912
913 /*
914   setup the public address lists from a file
915 */
916 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
917 {
918         char **lines;
919         int nlines;
920         int i;
921
922         lines = file_lines_load(alist, &nlines, ctdb);
923         if (lines == NULL) {
924                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
925                 return -1;
926         }
927         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
928                 nlines--;
929         }
930
931         for (i=0;i<nlines;i++) {
932                 unsigned mask;
933                 ctdb_sock_addr addr;
934                 const char *addrstr;
935                 const char *ifaces;
936                 char *tok, *line;
937
938                 line = lines[i];
939                 while ((*line == ' ') || (*line == '\t')) {
940                         line++;
941                 }
942                 if (*line == '#') {
943                         continue;
944                 }
945                 if (strcmp(line, "") == 0) {
946                         continue;
947                 }
948                 tok = strtok(line, " \t");
949                 addrstr = tok;
950                 tok = strtok(NULL, " \t");
951                 if (tok == NULL) {
952                         if (NULL == ctdb->default_public_interface) {
953                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
954                                          i+1));
955                                 talloc_free(lines);
956                                 return -1;
957                         }
958                         ifaces = ctdb->default_public_interface;
959                 } else {
960                         ifaces = tok;
961                 }
962
963                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
964                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
965                         talloc_free(lines);
966                         return -1;
967                 }
968                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
969                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
970                         talloc_free(lines);
971                         return -1;
972                 }
973         }
974
975         talloc_free(lines);
976         return 0;
977 }
978
979 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
980                               const char *iface,
981                               const char *ip)
982 {
983         struct ctdb_vnn *svnn;
984         bool ok;
985         int ret;
986
987         svnn = talloc_zero(ctdb, struct ctdb_vnn);
988         CTDB_NO_MEMORY(ctdb, svnn);
989
990         svnn->ifaces = talloc_array(svnn, const char *, 2);
991         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
992         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
993         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
994         svnn->ifaces[1] = NULL;
995
996         ok = parse_ip(ip, iface, 0, &svnn->public_address);
997         if (!ok) {
998                 talloc_free(svnn);
999                 return -1;
1000         }
1001
1002         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1003         if (ret != 0) {
1004                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1005                                    "for single_ip[%s]\n",
1006                                    svnn->ifaces[0],
1007                                    ctdb_addr_to_str(&svnn->public_address)));
1008                 talloc_free(svnn);
1009                 return -1;
1010         }
1011
1012         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1013         if (ret != 0) {
1014                 talloc_free(svnn);
1015                 return -1;
1016         }
1017
1018         ctdb->single_ip_vnn = svnn;
1019         return 0;
1020 }
1021
1022 struct ctdb_public_ip_list {
1023         struct ctdb_public_ip_list *next;
1024         uint32_t pnn;
1025         ctdb_sock_addr addr;
1026 };
1027
1028
1029 /* Given a physical node, return the number of
1030    public addresses that is currently assigned to this node.
1031 */
1032 static int node_ip_coverage(struct ctdb_context *ctdb, 
1033         int32_t pnn,
1034         struct ctdb_public_ip_list *ips)
1035 {
1036         int num=0;
1037
1038         for (;ips;ips=ips->next) {
1039                 if (ips->pnn == pnn) {
1040                         num++;
1041                 }
1042         }
1043         return num;
1044 }
1045
1046
1047 /* Check if this is a public ip known to the node, i.e. can that
1048    node takeover this ip ?
1049 */
1050 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1051                 struct ctdb_public_ip_list *ip)
1052 {
1053         struct ctdb_all_public_ips *public_ips;
1054         int i;
1055
1056         public_ips = ctdb->nodes[pnn]->available_public_ips;
1057
1058         if (public_ips == NULL) {
1059                 return -1;
1060         }
1061
1062         for (i=0;i<public_ips->num;i++) {
1063                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1064                         /* yes, this node can serve this public ip */
1065                         return 0;
1066                 }
1067         }
1068
1069         return -1;
1070 }
1071
1072
1073 /* search the node lists list for a node to takeover this ip.
1074    pick the node that currently are serving the least number of ips
1075    so that the ips get spread out evenly.
1076 */
1077 static int find_takeover_node(struct ctdb_context *ctdb, 
1078                 struct ctdb_node_map *nodemap, uint32_t mask, 
1079                 struct ctdb_public_ip_list *ip,
1080                 struct ctdb_public_ip_list *all_ips)
1081 {
1082         int pnn, min=0, num;
1083         int i;
1084
1085         pnn    = -1;
1086         for (i=0;i<nodemap->num;i++) {
1087                 if (nodemap->nodes[i].flags & mask) {
1088                         /* This node is not healty and can not be used to serve
1089                            a public address 
1090                         */
1091                         continue;
1092                 }
1093
1094                 /* verify that this node can serve this ip */
1095                 if (can_node_serve_ip(ctdb, i, ip)) {
1096                         /* no it couldnt   so skip to the next node */
1097                         continue;
1098                 }
1099
1100                 num = node_ip_coverage(ctdb, i, all_ips);
1101                 /* was this the first node we checked ? */
1102                 if (pnn == -1) {
1103                         pnn = i;
1104                         min  = num;
1105                 } else {
1106                         if (num < min) {
1107                                 pnn = i;
1108                                 min  = num;
1109                         }
1110                 }
1111         }       
1112         if (pnn == -1) {
1113                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1114                         ctdb_addr_to_str(&ip->addr)));
1115
1116                 return -1;
1117         }
1118
1119         ip->pnn = pnn;
1120         return 0;
1121 }
1122
1123 #define IP_KEYLEN       4
1124 static uint32_t *ip_key(ctdb_sock_addr *ip)
1125 {
1126         static uint32_t key[IP_KEYLEN];
1127
1128         bzero(key, sizeof(key));
1129
1130         switch (ip->sa.sa_family) {
1131         case AF_INET:
1132                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1133                 break;
1134         case AF_INET6:
1135                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1136                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1137                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1138                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1139                 break;
1140         default:
1141                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1142                 return key;
1143         }
1144
1145         return key;
1146 }
1147
1148 static void *add_ip_callback(void *parm, void *data)
1149 {
1150         return parm;
1151 }
1152
1153 void getips_count_callback(void *param, void *data)
1154 {
1155         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1156         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1157
1158         new_ip->next = *ip_list;
1159         *ip_list     = new_ip;
1160 }
1161
1162 static struct ctdb_public_ip_list *
1163 create_merged_ip_list(struct ctdb_context *ctdb)
1164 {
1165         int i, j;
1166         struct ctdb_public_ip_list *ip_list;
1167         struct ctdb_all_public_ips *public_ips;
1168
1169         if (ctdb->ip_tree != NULL) {
1170                 talloc_free(ctdb->ip_tree);
1171                 ctdb->ip_tree = NULL;
1172         }
1173         ctdb->ip_tree = trbt_create(ctdb, 0);
1174
1175         for (i=0;i<ctdb->num_nodes;i++) {
1176                 public_ips = ctdb->nodes[i]->known_public_ips;
1177
1178                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1179                         continue;
1180                 }
1181
1182                 /* there were no public ips for this node */
1183                 if (public_ips == NULL) {
1184                         continue;
1185                 }               
1186
1187                 for (j=0;j<public_ips->num;j++) {
1188                         struct ctdb_public_ip_list *tmp_ip; 
1189
1190                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1191                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1192                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1193                         tmp_ip->addr = public_ips->ips[j].addr;
1194                         tmp_ip->next = NULL;
1195
1196                         trbt_insertarray32_callback(ctdb->ip_tree,
1197                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1198                                 add_ip_callback,
1199                                 tmp_ip);
1200                 }
1201         }
1202
1203         ip_list = NULL;
1204         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1205
1206         return ip_list;
1207 }
1208
1209 /*
1210   make any IP alias changes for public addresses that are necessary 
1211  */
1212 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1213 {
1214         int i, num_healthy, retries;
1215         struct ctdb_public_ip ip;
1216         struct ctdb_public_ipv4 ipv4;
1217         uint32_t mask;
1218         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1219         int maxnode, maxnum=0, minnode, minnum=0, num;
1220         TDB_DATA data;
1221         struct client_async_data *async_data;
1222         struct ctdb_client_control_state *state;
1223         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1224
1225
1226         ZERO_STRUCT(ip);
1227
1228         /* Count how many completely healthy nodes we have */
1229         num_healthy = 0;
1230         for (i=0;i<nodemap->num;i++) {
1231                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1232                         num_healthy++;
1233                 }
1234         }
1235
1236         if (num_healthy > 0) {
1237                 /* We have healthy nodes, so only consider them for 
1238                    serving public addresses
1239                 */
1240                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1241         } else {
1242                 /* We didnt have any completely healthy nodes so
1243                    use "disabled" nodes as a fallback
1244                 */
1245                 mask = NODE_FLAGS_INACTIVE;
1246         }
1247
1248         /* since nodes only know about those public addresses that
1249            can be served by that particular node, no single node has
1250            a full list of all public addresses that exist in the cluster.
1251            Walk over all node structures and create a merged list of
1252            all public addresses that exist in the cluster.
1253
1254            keep the tree of ips around as ctdb->ip_tree
1255         */
1256         all_ips = create_merged_ip_list(ctdb);
1257
1258         /* If we want deterministic ip allocations, i.e. that the ip addresses
1259            will always be allocated the same way for a specific set of
1260            available/unavailable nodes.
1261         */
1262         if (1 == ctdb->tunable.deterministic_public_ips) {              
1263                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1264                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1265                         tmp_ip->pnn = i%nodemap->num;
1266                 }
1267         }
1268
1269
1270         /* mark all public addresses with a masked node as being served by
1271            node -1
1272         */
1273         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1274                 if (tmp_ip->pnn == -1) {
1275                         continue;
1276                 }
1277                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1278                         tmp_ip->pnn = -1;
1279                 }
1280         }
1281
1282         /* verify that the assigned nodes can serve that public ip
1283            and set it to -1 if not
1284         */
1285         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1286                 if (tmp_ip->pnn == -1) {
1287                         continue;
1288                 }
1289                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1290                         /* this node can not serve this ip. */
1291                         tmp_ip->pnn = -1;
1292                 }
1293         }
1294
1295
1296         /* now we must redistribute all public addresses with takeover node
1297            -1 among the nodes available
1298         */
1299         retries = 0;
1300 try_again:
1301         /* loop over all ip's and find a physical node to cover for 
1302            each unassigned ip.
1303         */
1304         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1305                 if (tmp_ip->pnn == -1) {
1306                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1307                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1308                                         ctdb_addr_to_str(&tmp_ip->addr)));
1309                         }
1310                 }
1311         }
1312
1313         /* If we dont want ips to fail back after a node becomes healthy
1314            again, we wont even try to reallocat the ip addresses so that
1315            they are evenly spread out.
1316            This can NOT be used at the same time as DeterministicIPs !
1317         */
1318         if (1 == ctdb->tunable.no_ip_failback) {
1319                 if (1 == ctdb->tunable.deterministic_public_ips) {
1320                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1321                 }
1322                 goto finished;
1323         }
1324
1325
1326         /* now, try to make sure the ip adresses are evenly distributed
1327            across the node.
1328            for each ip address, loop over all nodes that can serve this
1329            ip and make sure that the difference between the node
1330            serving the most and the node serving the least ip's are not greater
1331            than 1.
1332         */
1333         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1334                 if (tmp_ip->pnn == -1) {
1335                         continue;
1336                 }
1337
1338                 /* Get the highest and lowest number of ips's served by any 
1339                    valid node which can serve this ip.
1340                 */
1341                 maxnode = -1;
1342                 minnode = -1;
1343                 for (i=0;i<nodemap->num;i++) {
1344                         if (nodemap->nodes[i].flags & mask) {
1345                                 continue;
1346                         }
1347
1348                         /* only check nodes that can actually serve this ip */
1349                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1350                                 /* no it couldnt   so skip to the next node */
1351                                 continue;
1352                         }
1353
1354                         num = node_ip_coverage(ctdb, i, all_ips);
1355                         if (maxnode == -1) {
1356                                 maxnode = i;
1357                                 maxnum  = num;
1358                         } else {
1359                                 if (num > maxnum) {
1360                                         maxnode = i;
1361                                         maxnum  = num;
1362                                 }
1363                         }
1364                         if (minnode == -1) {
1365                                 minnode = i;
1366                                 minnum  = num;
1367                         } else {
1368                                 if (num < minnum) {
1369                                         minnode = i;
1370                                         minnum  = num;
1371                                 }
1372                         }
1373                 }
1374                 if (maxnode == -1) {
1375                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1376                                 ctdb_addr_to_str(&tmp_ip->addr)));
1377
1378                         continue;
1379                 }
1380
1381                 /* If we want deterministic IPs then dont try to reallocate 
1382                    them to spread out the load.
1383                 */
1384                 if (1 == ctdb->tunable.deterministic_public_ips) {
1385                         continue;
1386                 }
1387
1388                 /* if the spread between the smallest and largest coverage by
1389                    a node is >=2 we steal one of the ips from the node with
1390                    most coverage to even things out a bit.
1391                    try to do this at most 5 times  since we dont want to spend
1392                    too much time balancing the ip coverage.
1393                 */
1394                 if ( (maxnum > minnum+1)
1395                   && (retries < 5) ){
1396                         struct ctdb_public_ip_list *tmp;
1397
1398                         /* mark one of maxnode's vnn's as unassigned and try
1399                            again
1400                         */
1401                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1402                                 if (tmp->pnn == maxnode) {
1403                                         tmp->pnn = -1;
1404                                         retries++;
1405                                         goto try_again;
1406                                 }
1407                         }
1408                 }
1409         }
1410
1411
1412         /* finished distributing the public addresses, now just send the 
1413            info out to the nodes
1414         */
1415 finished:
1416
1417         /* at this point ->pnn is the node which will own each IP
1418            or -1 if there is no node that can cover this ip
1419         */
1420
1421         /* now tell all nodes to delete any alias that they should not
1422            have.  This will be a NOOP on nodes that don't currently
1423            hold the given alias */
1424         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1425         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1426
1427         for (i=0;i<nodemap->num;i++) {
1428                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1429                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1430                         continue;
1431                 }
1432
1433                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1434                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1435                                 /* This node should be serving this
1436                                    vnn so dont tell it to release the ip
1437                                 */
1438                                 continue;
1439                         }
1440                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1441                                 ipv4.pnn = tmp_ip->pnn;
1442                                 ipv4.sin = tmp_ip->addr.ip;
1443
1444                                 
1445                                 data.dsize = sizeof(ipv4);
1446                                 data.dptr  = (uint8_t *)&ipv4;
1447                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1448                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1449                                                 data, async_data,
1450                                                 NULL);
1451                                 if (state != NULL) {
1452                                         event_add_timed(ctdb->ev, state, TAKEOVER_TIMEOUT(), ctdb_control_timeout_func, state);
1453                                 }
1454                         } else {
1455                                 ip.pnn  = tmp_ip->pnn;
1456                                 ip.addr = tmp_ip->addr;
1457
1458                                 data.dsize = sizeof(ip);
1459                                 data.dptr  = (uint8_t *)&ip;
1460                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1461                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1462                                                 data, async_data,
1463                                                 NULL);
1464                                 if (state != NULL) {
1465                                         event_add_timed(ctdb->ev, state, TAKEOVER_TIMEOUT(), ctdb_control_timeout_func, state);
1466                                 }
1467                         }
1468
1469                         if (state == NULL) {
1470                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1471                                 talloc_free(tmp_ctx);
1472                                 return -1;
1473                         }
1474                 
1475                         ctdb_client_async_add(async_data, state);
1476                 }
1477         }
1478         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1479                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1480                 talloc_free(tmp_ctx);
1481                 return -1;
1482         }
1483         talloc_free(async_data);
1484
1485
1486         /* tell all nodes to get their own IPs */
1487         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1488         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1489         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1490                 if (tmp_ip->pnn == -1) {
1491                         /* this IP won't be taken over */
1492                         continue;
1493                 }
1494
1495                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1496                         ipv4.pnn = tmp_ip->pnn;
1497                         ipv4.sin = tmp_ip->addr.ip;
1498
1499                         data.dsize = sizeof(ipv4);
1500                         data.dptr  = (uint8_t *)&ipv4;
1501                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1502                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1503                                         data, async_data,
1504                                         NULL);
1505                         if (state != NULL) {
1506                                 event_add_timed(ctdb->ev, state, TAKEOVER_TIMEOUT(), ctdb_control_timeout_func, state);
1507                         }
1508                 } else {
1509                         ip.pnn  = tmp_ip->pnn;
1510                         ip.addr = tmp_ip->addr;
1511
1512                         data.dsize = sizeof(ip);
1513                         data.dptr  = (uint8_t *)&ip;
1514                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1515                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1516                                         data, async_data,
1517                                         NULL);
1518                         if (state != NULL) {
1519                                 event_add_timed(ctdb->ev, state, TAKEOVER_TIMEOUT(), ctdb_control_timeout_func, state);
1520                         }
1521                 }
1522                 if (state == NULL) {
1523                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1524                         talloc_free(tmp_ctx);
1525                         return -1;
1526                 }
1527                 
1528                 ctdb_client_async_add(async_data, state);
1529         }
1530         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1531                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1532                 talloc_free(tmp_ctx);
1533                 return -1;
1534         }
1535
1536         talloc_free(tmp_ctx);
1537         return 0;
1538 }
1539
1540
1541 /*
1542   destroy a ctdb_client_ip structure
1543  */
1544 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1545 {
1546         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1547                 ctdb_addr_to_str(&ip->addr),
1548                 ntohs(ip->addr.ip.sin_port),
1549                 ip->client_id));
1550
1551         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1552         return 0;
1553 }
1554
1555 /*
1556   called by a client to inform us of a TCP connection that it is managing
1557   that should tickled with an ACK when IP takeover is done
1558   we handle both the old ipv4 style of packets as well as the new ipv4/6
1559   pdus.
1560  */
1561 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1562                                 TDB_DATA indata)
1563 {
1564         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1565         struct ctdb_control_tcp *old_addr = NULL;
1566         struct ctdb_control_tcp_addr new_addr;
1567         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1568         struct ctdb_tcp_list *tcp;
1569         struct ctdb_control_tcp_vnn t;
1570         int ret;
1571         TDB_DATA data;
1572         struct ctdb_client_ip *ip;
1573         struct ctdb_vnn *vnn;
1574         ctdb_sock_addr addr;
1575
1576         switch (indata.dsize) {
1577         case sizeof(struct ctdb_control_tcp):
1578                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1579                 ZERO_STRUCT(new_addr);
1580                 tcp_sock = &new_addr;
1581                 tcp_sock->src.ip  = old_addr->src;
1582                 tcp_sock->dest.ip = old_addr->dest;
1583                 break;
1584         case sizeof(struct ctdb_control_tcp_addr):
1585                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1586                 break;
1587         default:
1588                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1589                                  "to ctdb_control_tcp_client. size was %d but "
1590                                  "only allowed sizes are %lu and %lu\n",
1591                                  (int)indata.dsize,
1592                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1593                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1594                 return -1;
1595         }
1596
1597         addr = tcp_sock->src;
1598         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1599         addr = tcp_sock->dest;
1600         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1601
1602         ZERO_STRUCT(addr);
1603         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1604         vnn = find_public_ip_vnn(ctdb, &addr);
1605         if (vnn == NULL) {
1606                 switch (addr.sa.sa_family) {
1607                 case AF_INET:
1608                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1609                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1610                                         ctdb_addr_to_str(&addr)));
1611                         }
1612                         break;
1613                 case AF_INET6:
1614                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1615                                 ctdb_addr_to_str(&addr)));
1616                         break;
1617                 default:
1618                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1619                 }
1620
1621                 return 0;
1622         }
1623
1624         if (vnn->pnn != ctdb->pnn) {
1625                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1626                         ctdb_addr_to_str(&addr),
1627                         client_id, client->pid));
1628                 /* failing this call will tell smbd to die */
1629                 return -1;
1630         }
1631
1632         ip = talloc(client, struct ctdb_client_ip);
1633         CTDB_NO_MEMORY(ctdb, ip);
1634
1635         ip->ctdb      = ctdb;
1636         ip->addr      = addr;
1637         ip->client_id = client_id;
1638         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1639         DLIST_ADD(ctdb->client_ip_list, ip);
1640
1641         tcp = talloc(client, struct ctdb_tcp_list);
1642         CTDB_NO_MEMORY(ctdb, tcp);
1643
1644         tcp->connection.src_addr = tcp_sock->src;
1645         tcp->connection.dst_addr = tcp_sock->dest;
1646
1647         DLIST_ADD(client->tcp_list, tcp);
1648
1649         t.src  = tcp_sock->src;
1650         t.dest = tcp_sock->dest;
1651
1652         data.dptr = (uint8_t *)&t;
1653         data.dsize = sizeof(t);
1654
1655         switch (addr.sa.sa_family) {
1656         case AF_INET:
1657                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1658                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1659                         ctdb_addr_to_str(&tcp_sock->src),
1660                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1661                 break;
1662         case AF_INET6:
1663                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1664                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1665                         ctdb_addr_to_str(&tcp_sock->src),
1666                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1667                 break;
1668         default:
1669                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1670         }
1671
1672
1673         /* tell all nodes about this tcp connection */
1674         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1675                                        CTDB_CONTROL_TCP_ADD,
1676                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1677         if (ret != 0) {
1678                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1679                 return -1;
1680         }
1681
1682         return 0;
1683 }
1684
1685 /*
1686   find a tcp address on a list
1687  */
1688 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1689                                            struct ctdb_tcp_connection *tcp)
1690 {
1691         int i;
1692
1693         if (array == NULL) {
1694                 return NULL;
1695         }
1696
1697         for (i=0;i<array->num;i++) {
1698                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1699                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1700                         return &array->connections[i];
1701                 }
1702         }
1703         return NULL;
1704 }
1705
1706 /*
1707   called by a daemon to inform us of a TCP connection that one of its
1708   clients managing that should tickled with an ACK when IP takeover is
1709   done
1710  */
1711 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1712 {
1713         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1714         struct ctdb_tcp_array *tcparray;
1715         struct ctdb_tcp_connection tcp;
1716         struct ctdb_vnn *vnn;
1717
1718         vnn = find_public_ip_vnn(ctdb, &p->dest);
1719         if (vnn == NULL) {
1720                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1721                         ctdb_addr_to_str(&p->dest)));
1722
1723                 return -1;
1724         }
1725
1726
1727         tcparray = vnn->tcp_array;
1728
1729         /* If this is the first tickle */
1730         if (tcparray == NULL) {
1731                 tcparray = talloc_size(ctdb->nodes, 
1732                         offsetof(struct ctdb_tcp_array, connections) +
1733                         sizeof(struct ctdb_tcp_connection) * 1);
1734                 CTDB_NO_MEMORY(ctdb, tcparray);
1735                 vnn->tcp_array = tcparray;
1736
1737                 tcparray->num = 0;
1738                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1739                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1740
1741                 tcparray->connections[tcparray->num].src_addr = p->src;
1742                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1743                 tcparray->num++;
1744                 return 0;
1745         }
1746
1747
1748         /* Do we already have this tickle ?*/
1749         tcp.src_addr = p->src;
1750         tcp.dst_addr = p->dest;
1751         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1752                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1753                         ctdb_addr_to_str(&tcp.dst_addr),
1754                         ntohs(tcp.dst_addr.ip.sin_port),
1755                         vnn->pnn));
1756                 return 0;
1757         }
1758
1759         /* A new tickle, we must add it to the array */
1760         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1761                                         struct ctdb_tcp_connection,
1762                                         tcparray->num+1);
1763         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1764
1765         vnn->tcp_array = tcparray;
1766         tcparray->connections[tcparray->num].src_addr = p->src;
1767         tcparray->connections[tcparray->num].dst_addr = p->dest;
1768         tcparray->num++;
1769                                 
1770         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1771                 ctdb_addr_to_str(&tcp.dst_addr),
1772                 ntohs(tcp.dst_addr.ip.sin_port),
1773                 vnn->pnn));
1774
1775         return 0;
1776 }
1777
1778
1779 /*
1780   called by a daemon to inform us of a TCP connection that one of its
1781   clients managing that should tickled with an ACK when IP takeover is
1782   done
1783  */
1784 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1785 {
1786         struct ctdb_tcp_connection *tcpp;
1787         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1788
1789         if (vnn == NULL) {
1790                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1791                         ctdb_addr_to_str(&conn->dst_addr)));
1792                 return;
1793         }
1794
1795         /* if the array is empty we cant remove it
1796            and we dont need to do anything
1797          */
1798         if (vnn->tcp_array == NULL) {
1799                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1800                         ctdb_addr_to_str(&conn->dst_addr),
1801                         ntohs(conn->dst_addr.ip.sin_port)));
1802                 return;
1803         }
1804
1805
1806         /* See if we know this connection
1807            if we dont know this connection  then we dont need to do anything
1808          */
1809         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1810         if (tcpp == NULL) {
1811                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1812                         ctdb_addr_to_str(&conn->dst_addr),
1813                         ntohs(conn->dst_addr.ip.sin_port)));
1814                 return;
1815         }
1816
1817
1818         /* We need to remove this entry from the array.
1819            Instead of allocating a new array and copying data to it
1820            we cheat and just copy the last entry in the existing array
1821            to the entry that is to be removed and just shring the 
1822            ->num field
1823          */
1824         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1825         vnn->tcp_array->num--;
1826
1827         /* If we deleted the last entry we also need to remove the entire array
1828          */
1829         if (vnn->tcp_array->num == 0) {
1830                 talloc_free(vnn->tcp_array);
1831                 vnn->tcp_array = NULL;
1832         }               
1833
1834         vnn->tcp_update_needed = true;
1835
1836         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1837                 ctdb_addr_to_str(&conn->src_addr),
1838                 ntohs(conn->src_addr.ip.sin_port)));
1839 }
1840
1841
1842 /*
1843   called when a daemon restarts - send all tickes for all public addresses
1844   we are serving immediately to the new node.
1845  */
1846 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1847 {
1848 /*XXX here we should send all tickes we are serving to the new node */
1849         return 0;
1850 }
1851
1852
1853 /*
1854   called when a client structure goes away - hook to remove
1855   elements from the tcp_list in all daemons
1856  */
1857 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1858 {
1859         while (client->tcp_list) {
1860                 struct ctdb_tcp_list *tcp = client->tcp_list;
1861                 DLIST_REMOVE(client->tcp_list, tcp);
1862                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1863         }
1864 }
1865
1866
1867 /*
1868   release all IPs on shutdown
1869  */
1870 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1871 {
1872         struct ctdb_vnn *vnn;
1873
1874         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1875                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1876                         ctdb_vnn_unassign_iface(ctdb, vnn);
1877                         continue;
1878                 }
1879                 if (!vnn->iface) {
1880                         continue;
1881                 }
1882                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1883                                   ctdb_vnn_iface_string(vnn),
1884                                   ctdb_addr_to_str(&vnn->public_address),
1885                                   vnn->public_netmask_bits);
1886                 release_kill_clients(ctdb, &vnn->public_address);
1887                 ctdb_vnn_unassign_iface(ctdb, vnn);
1888         }
1889 }
1890
1891
1892 /*
1893   get list of public IPs
1894  */
1895 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1896                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1897 {
1898         int i, num, len;
1899         struct ctdb_all_public_ips *ips;
1900         struct ctdb_vnn *vnn;
1901         bool only_available = false;
1902
1903         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1904                 only_available = true;
1905         }
1906
1907         /* count how many public ip structures we have */
1908         num = 0;
1909         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1910                 num++;
1911         }
1912
1913         len = offsetof(struct ctdb_all_public_ips, ips) + 
1914                 num*sizeof(struct ctdb_public_ip);
1915         ips = talloc_zero_size(outdata, len);
1916         CTDB_NO_MEMORY(ctdb, ips);
1917
1918         i = 0;
1919         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1920                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1921                         continue;
1922                 }
1923                 ips->ips[i].pnn  = vnn->pnn;
1924                 ips->ips[i].addr = vnn->public_address;
1925                 i++;
1926         }
1927         ips->num = i;
1928         len = offsetof(struct ctdb_all_public_ips, ips) +
1929                 i*sizeof(struct ctdb_public_ip);
1930
1931         outdata->dsize = len;
1932         outdata->dptr  = (uint8_t *)ips;
1933
1934         return 0;
1935 }
1936
1937
1938 /*
1939   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1940  */
1941 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1942                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1943 {
1944         int i, num, len;
1945         struct ctdb_all_public_ipsv4 *ips;
1946         struct ctdb_vnn *vnn;
1947
1948         /* count how many public ip structures we have */
1949         num = 0;
1950         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1951                 if (vnn->public_address.sa.sa_family != AF_INET) {
1952                         continue;
1953                 }
1954                 num++;
1955         }
1956
1957         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1958                 num*sizeof(struct ctdb_public_ipv4);
1959         ips = talloc_zero_size(outdata, len);
1960         CTDB_NO_MEMORY(ctdb, ips);
1961
1962         outdata->dsize = len;
1963         outdata->dptr  = (uint8_t *)ips;
1964
1965         ips->num = num;
1966         i = 0;
1967         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1968                 if (vnn->public_address.sa.sa_family != AF_INET) {
1969                         continue;
1970                 }
1971                 ips->ips[i].pnn = vnn->pnn;
1972                 ips->ips[i].sin = vnn->public_address.ip;
1973                 i++;
1974         }
1975
1976         return 0;
1977 }
1978
1979 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1980                                         struct ctdb_req_control *c,
1981                                         TDB_DATA indata,
1982                                         TDB_DATA *outdata)
1983 {
1984         int i, num, len;
1985         ctdb_sock_addr *addr;
1986         struct ctdb_control_public_ip_info *info;
1987         struct ctdb_vnn *vnn;
1988
1989         addr = (ctdb_sock_addr *)indata.dptr;
1990
1991         vnn = find_public_ip_vnn(ctdb, addr);
1992         if (vnn == NULL) {
1993                 /* if it is not a public ip   it could be our 'single ip' */
1994                 if (ctdb->single_ip_vnn) {
1995                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
1996                                 vnn = ctdb->single_ip_vnn;
1997                         }
1998                 }
1999         }
2000         if (vnn == NULL) {
2001                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2002                                  "'%s'not a public address\n",
2003                                  ctdb_addr_to_str(addr)));
2004                 return -1;
2005         }
2006
2007         /* count how many public ip structures we have */
2008         num = 0;
2009         for (;vnn->ifaces[num];) {
2010                 num++;
2011         }
2012
2013         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2014                 num*sizeof(struct ctdb_control_iface_info);
2015         info = talloc_zero_size(outdata, len);
2016         CTDB_NO_MEMORY(ctdb, info);
2017
2018         info->ip.addr = vnn->public_address;
2019         info->ip.pnn = vnn->pnn;
2020         info->active_idx = 0xFFFFFFFF;
2021
2022         for (i=0; vnn->ifaces[i]; i++) {
2023                 struct ctdb_iface *cur;
2024
2025                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2026                 if (cur == NULL) {
2027                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2028                                            vnn->ifaces[i]));
2029                         return -1;
2030                 }
2031                 if (vnn->iface == cur) {
2032                         info->active_idx = i;
2033                 }
2034                 strcpy(info->ifaces[i].name, cur->name);
2035                 info->ifaces[i].link_state = cur->link_up;
2036                 info->ifaces[i].references = cur->references;
2037         }
2038         info->num = i;
2039         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2040                 i*sizeof(struct ctdb_control_iface_info);
2041
2042         outdata->dsize = len;
2043         outdata->dptr  = (uint8_t *)info;
2044
2045         return 0;
2046 }
2047
2048 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2049                                 struct ctdb_req_control *c,
2050                                 TDB_DATA *outdata)
2051 {
2052         int i, num, len;
2053         struct ctdb_control_get_ifaces *ifaces;
2054         struct ctdb_iface *cur;
2055
2056         /* count how many public ip structures we have */
2057         num = 0;
2058         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2059                 num++;
2060         }
2061
2062         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2063                 num*sizeof(struct ctdb_control_iface_info);
2064         ifaces = talloc_zero_size(outdata, len);
2065         CTDB_NO_MEMORY(ctdb, ifaces);
2066
2067         i = 0;
2068         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2069                 strcpy(ifaces->ifaces[i].name, cur->name);
2070                 ifaces->ifaces[i].link_state = cur->link_up;
2071                 ifaces->ifaces[i].references = cur->references;
2072                 i++;
2073         }
2074         ifaces->num = i;
2075         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2076                 i*sizeof(struct ctdb_control_iface_info);
2077
2078         outdata->dsize = len;
2079         outdata->dptr  = (uint8_t *)ifaces;
2080
2081         return 0;
2082 }
2083
2084 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2085                                     struct ctdb_req_control *c,
2086                                     TDB_DATA indata)
2087 {
2088         struct ctdb_control_iface_info *info;
2089         struct ctdb_iface *iface;
2090         bool link_up = false;
2091
2092         info = (struct ctdb_control_iface_info *)indata.dptr;
2093
2094         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2095                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2096                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2097                                   len, len, info->name));
2098                 return -1;
2099         }
2100
2101         switch (info->link_state) {
2102         case 0:
2103                 link_up = false;
2104                 break;
2105         case 1:
2106                 link_up = true;
2107                 break;
2108         default:
2109                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2110                                   (unsigned int)info->link_state));
2111                 return -1;
2112         }
2113
2114         if (info->references != 0) {
2115                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2116                                   (unsigned int)info->references));
2117                 return -1;
2118         }
2119
2120         iface = ctdb_find_iface(ctdb, info->name);
2121         if (iface == NULL) {
2122                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2123                                   info->name));
2124                 return -1;
2125         }
2126
2127         if (link_up == iface->link_up) {
2128                 return 0;
2129         }
2130
2131         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2132               ("iface[%s] has changed it's link status %s => %s\n",
2133                iface->name,
2134                iface->link_up?"up":"down",
2135                link_up?"up":"down"));
2136
2137         iface->link_up = link_up;
2138         return 0;
2139 }
2140
2141
2142 /* 
2143    structure containing the listening socket and the list of tcp connections
2144    that the ctdb daemon is to kill
2145 */
2146 struct ctdb_kill_tcp {
2147         struct ctdb_vnn *vnn;
2148         struct ctdb_context *ctdb;
2149         int capture_fd;
2150         struct fd_event *fde;
2151         trbt_tree_t *connections;
2152         void *private_data;
2153 };
2154
2155 /*
2156   a tcp connection that is to be killed
2157  */
2158 struct ctdb_killtcp_con {
2159         ctdb_sock_addr src_addr;
2160         ctdb_sock_addr dst_addr;
2161         int count;
2162         struct ctdb_kill_tcp *killtcp;
2163 };
2164
2165 /* this function is used to create a key to represent this socketpair
2166    in the killtcp tree.
2167    this key is used to insert and lookup matching socketpairs that are
2168    to be tickled and RST
2169 */
2170 #define KILLTCP_KEYLEN  10
2171 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2172 {
2173         static uint32_t key[KILLTCP_KEYLEN];
2174
2175         bzero(key, sizeof(key));
2176
2177         if (src->sa.sa_family != dst->sa.sa_family) {
2178                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2179                 return key;
2180         }
2181         
2182         switch (src->sa.sa_family) {
2183         case AF_INET:
2184                 key[0]  = dst->ip.sin_addr.s_addr;
2185                 key[1]  = src->ip.sin_addr.s_addr;
2186                 key[2]  = dst->ip.sin_port;
2187                 key[3]  = src->ip.sin_port;
2188                 break;
2189         case AF_INET6:
2190                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2191                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2192                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2193                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2194                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2195                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2196                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2197                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2198                 key[8]  = dst->ip6.sin6_port;
2199                 key[9]  = src->ip6.sin6_port;
2200                 break;
2201         default:
2202                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2203                 return key;
2204         }
2205
2206         return key;
2207 }
2208
2209 /*
2210   called when we get a read event on the raw socket
2211  */
2212 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2213                                 uint16_t flags, void *private_data)
2214 {
2215         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2216         struct ctdb_killtcp_con *con;
2217         ctdb_sock_addr src, dst;
2218         uint32_t ack_seq, seq;
2219
2220         if (!(flags & EVENT_FD_READ)) {
2221                 return;
2222         }
2223
2224         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2225                                 killtcp->private_data,
2226                                 &src, &dst,
2227                                 &ack_seq, &seq) != 0) {
2228                 /* probably a non-tcp ACK packet */
2229                 return;
2230         }
2231
2232         /* check if we have this guy in our list of connections
2233            to kill
2234         */
2235         con = trbt_lookuparray32(killtcp->connections, 
2236                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2237         if (con == NULL) {
2238                 /* no this was some other packet we can just ignore */
2239                 return;
2240         }
2241
2242         /* This one has been tickled !
2243            now reset him and remove him from the list.
2244          */
2245         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2246                 ntohs(con->dst_addr.ip.sin_port),
2247                 ctdb_addr_to_str(&con->src_addr),
2248                 ntohs(con->src_addr.ip.sin_port)));
2249
2250         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2251         talloc_free(con);
2252 }
2253
2254
2255 /* when traversing the list of all tcp connections to send tickle acks to
2256    (so that we can capture the ack coming back and kill the connection
2257     by a RST)
2258    this callback is called for each connection we are currently trying to kill
2259 */
2260 static void tickle_connection_traverse(void *param, void *data)
2261 {
2262         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2263
2264         /* have tried too many times, just give up */
2265         if (con->count >= 5) {
2266                 talloc_free(con);
2267                 return;
2268         }
2269
2270         /* othervise, try tickling it again */
2271         con->count++;
2272         ctdb_sys_send_tcp(
2273                 (ctdb_sock_addr *)&con->dst_addr,
2274                 (ctdb_sock_addr *)&con->src_addr,
2275                 0, 0, 0);
2276 }
2277
2278
2279 /* 
2280    called every second until all sentenced connections have been reset
2281  */
2282 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2283                                               struct timeval t, void *private_data)
2284 {
2285         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2286
2287
2288         /* loop over all connections sending tickle ACKs */
2289         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
2290
2291
2292         /* If there are no more connections to kill we can remove the
2293            entire killtcp structure
2294          */
2295         if ( (killtcp->connections == NULL) || 
2296              (killtcp->connections->root == NULL) ) {
2297                 talloc_free(killtcp);
2298                 return;
2299         }
2300
2301         /* try tickling them again in a seconds time
2302          */
2303         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2304                         ctdb_tickle_sentenced_connections, killtcp);
2305 }
2306
2307 /*
2308   destroy the killtcp structure
2309  */
2310 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2311 {
2312         killtcp->vnn->killtcp = NULL;
2313         return 0;
2314 }
2315
2316
2317 /* nothing fancy here, just unconditionally replace any existing
2318    connection structure with the new one.
2319
2320    dont even free the old one if it did exist, that one is talloc_stolen
2321    by the same node in the tree anyway and will be deleted when the new data 
2322    is deleted
2323 */
2324 static void *add_killtcp_callback(void *parm, void *data)
2325 {
2326         return parm;
2327 }
2328
2329 /*
2330   add a tcp socket to the list of connections we want to RST
2331  */
2332 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2333                                        ctdb_sock_addr *s,
2334                                        ctdb_sock_addr *d)
2335 {
2336         ctdb_sock_addr src, dst;
2337         struct ctdb_kill_tcp *killtcp;
2338         struct ctdb_killtcp_con *con;
2339         struct ctdb_vnn *vnn;
2340
2341         ctdb_canonicalize_ip(s, &src);
2342         ctdb_canonicalize_ip(d, &dst);
2343
2344         vnn = find_public_ip_vnn(ctdb, &dst);
2345         if (vnn == NULL) {
2346                 vnn = find_public_ip_vnn(ctdb, &src);
2347         }
2348         if (vnn == NULL) {
2349                 /* if it is not a public ip   it could be our 'single ip' */
2350                 if (ctdb->single_ip_vnn) {
2351                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2352                                 vnn = ctdb->single_ip_vnn;
2353                         }
2354                 }
2355         }
2356         if (vnn == NULL) {
2357                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2358                 return -1;
2359         }
2360
2361         killtcp = vnn->killtcp;
2362         
2363         /* If this is the first connection to kill we must allocate
2364            a new structure
2365          */
2366         if (killtcp == NULL) {
2367                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2368                 CTDB_NO_MEMORY(ctdb, killtcp);
2369
2370                 killtcp->vnn         = vnn;
2371                 killtcp->ctdb        = ctdb;
2372                 killtcp->capture_fd  = -1;
2373                 killtcp->connections = trbt_create(killtcp, 0);
2374
2375                 vnn->killtcp         = killtcp;
2376                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2377         }
2378
2379
2380
2381         /* create a structure that describes this connection we want to
2382            RST and store it in killtcp->connections
2383         */
2384         con = talloc(killtcp, struct ctdb_killtcp_con);
2385         CTDB_NO_MEMORY(ctdb, con);
2386         con->src_addr = src;
2387         con->dst_addr = dst;
2388         con->count    = 0;
2389         con->killtcp  = killtcp;
2390
2391
2392         trbt_insertarray32_callback(killtcp->connections,
2393                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2394                         add_killtcp_callback, con);
2395
2396         /* 
2397            If we dont have a socket to listen on yet we must create it
2398          */
2399         if (killtcp->capture_fd == -1) {
2400                 const char *iface = ctdb_vnn_iface_string(vnn);
2401                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2402                 if (killtcp->capture_fd == -1) {
2403                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2404                                           "socket on iface '%s' for killtcp (%s)\n",
2405                                           iface, strerror(errno)));
2406                         goto failed;
2407                 }
2408         }
2409
2410
2411         if (killtcp->fde == NULL) {
2412                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2413                                             EVENT_FD_READ | EVENT_FD_AUTOCLOSE, 
2414                                             capture_tcp_handler, killtcp);
2415
2416                 /* We also need to set up some events to tickle all these connections
2417                    until they are all reset
2418                 */
2419                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2420                                 ctdb_tickle_sentenced_connections, killtcp);
2421         }
2422
2423         /* tickle him once now */
2424         ctdb_sys_send_tcp(
2425                 &con->dst_addr,
2426                 &con->src_addr,
2427                 0, 0, 0);
2428
2429         return 0;
2430
2431 failed:
2432         talloc_free(vnn->killtcp);
2433         vnn->killtcp = NULL;
2434         return -1;
2435 }
2436
2437 /*
2438   kill a TCP connection.
2439  */
2440 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2441 {
2442         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2443
2444         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2445 }
2446
2447 /*
2448   called by a daemon to inform us of the entire list of TCP tickles for
2449   a particular public address.
2450   this control should only be sent by the node that is currently serving
2451   that public address.
2452  */
2453 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2454 {
2455         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2456         struct ctdb_tcp_array *tcparray;
2457         struct ctdb_vnn *vnn;
2458
2459         /* We must at least have tickles.num or else we cant verify the size
2460            of the received data blob
2461          */
2462         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2463                                         tickles.connections)) {
2464                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2465                 return -1;
2466         }
2467
2468         /* verify that the size of data matches what we expect */
2469         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2470                                 tickles.connections)
2471                          + sizeof(struct ctdb_tcp_connection)
2472                                  * list->tickles.num) {
2473                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2474                 return -1;
2475         }       
2476
2477         vnn = find_public_ip_vnn(ctdb, &list->addr);
2478         if (vnn == NULL) {
2479                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2480                         ctdb_addr_to_str(&list->addr)));
2481
2482                 return 1;
2483         }
2484
2485         /* remove any old ticklelist we might have */
2486         talloc_free(vnn->tcp_array);
2487         vnn->tcp_array = NULL;
2488
2489         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2490         CTDB_NO_MEMORY(ctdb, tcparray);
2491
2492         tcparray->num = list->tickles.num;
2493
2494         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2495         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2496
2497         memcpy(tcparray->connections, &list->tickles.connections[0], 
2498                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2499
2500         /* We now have a new fresh tickle list array for this vnn */
2501         vnn->tcp_array = talloc_steal(vnn, tcparray);
2502         
2503         return 0;
2504 }
2505
2506 /*
2507   called to return the full list of tickles for the puclic address associated 
2508   with the provided vnn
2509  */
2510 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2511 {
2512         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2513         struct ctdb_control_tcp_tickle_list *list;
2514         struct ctdb_tcp_array *tcparray;
2515         int num;
2516         struct ctdb_vnn *vnn;
2517
2518         vnn = find_public_ip_vnn(ctdb, addr);
2519         if (vnn == NULL) {
2520                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2521                         ctdb_addr_to_str(addr)));
2522
2523                 return 1;
2524         }
2525
2526         tcparray = vnn->tcp_array;
2527         if (tcparray) {
2528                 num = tcparray->num;
2529         } else {
2530                 num = 0;
2531         }
2532
2533         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2534                                 tickles.connections)
2535                         + sizeof(struct ctdb_tcp_connection) * num;
2536
2537         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2538         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2539         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2540
2541         list->addr = *addr;
2542         list->tickles.num = num;
2543         if (num) {
2544                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2545                         sizeof(struct ctdb_tcp_connection) * num);
2546         }
2547
2548         return 0;
2549 }
2550
2551
2552 /*
2553   set the list of all tcp tickles for a public address
2554  */
2555 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2556                               struct timeval timeout, uint32_t destnode, 
2557                               ctdb_sock_addr *addr,
2558                               struct ctdb_tcp_array *tcparray)
2559 {
2560         int ret, num;
2561         TDB_DATA data;
2562         struct ctdb_control_tcp_tickle_list *list;
2563
2564         if (tcparray) {
2565                 num = tcparray->num;
2566         } else {
2567                 num = 0;
2568         }
2569
2570         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2571                                 tickles.connections) +
2572                         sizeof(struct ctdb_tcp_connection) * num;
2573         data.dptr = talloc_size(ctdb, data.dsize);
2574         CTDB_NO_MEMORY(ctdb, data.dptr);
2575
2576         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2577         list->addr = *addr;
2578         list->tickles.num = num;
2579         if (tcparray) {
2580                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2581         }
2582
2583         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2584                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2585                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2586         if (ret != 0) {
2587                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2588                 return -1;
2589         }
2590
2591         talloc_free(data.dptr);
2592
2593         return ret;
2594 }
2595
2596
2597 /*
2598   perform tickle updates if required
2599  */
2600 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2601                                 struct timed_event *te, 
2602                                 struct timeval t, void *private_data)
2603 {
2604         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2605         int ret;
2606         struct ctdb_vnn *vnn;
2607
2608         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2609                 /* we only send out updates for public addresses that 
2610                    we have taken over
2611                  */
2612                 if (ctdb->pnn != vnn->pnn) {
2613                         continue;
2614                 }
2615                 /* We only send out the updates if we need to */
2616                 if (!vnn->tcp_update_needed) {
2617                         continue;
2618                 }
2619                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2620                                 TAKEOVER_TIMEOUT(),
2621                                 CTDB_BROADCAST_CONNECTED,
2622                                 &vnn->public_address,
2623                                 vnn->tcp_array);
2624                 if (ret != 0) {
2625                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2626                                 ctdb_addr_to_str(&vnn->public_address)));
2627                 }
2628         }
2629
2630         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2631                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2632                              ctdb_update_tcp_tickles, ctdb);
2633 }               
2634         
2635
2636 /*
2637   start periodic update of tcp tickles
2638  */
2639 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2640 {
2641         ctdb->tickle_update_context = talloc_new(ctdb);
2642
2643         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2644                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2645                              ctdb_update_tcp_tickles, ctdb);
2646 }
2647
2648
2649
2650
2651 struct control_gratious_arp {
2652         struct ctdb_context *ctdb;
2653         ctdb_sock_addr addr;
2654         const char *iface;
2655         int count;
2656 };
2657
2658 /*
2659   send a control_gratuitous arp
2660  */
2661 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2662                                   struct timeval t, void *private_data)
2663 {
2664         int ret;
2665         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2666                                                         struct control_gratious_arp);
2667
2668         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2669         if (ret != 0) {
2670                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2671                                  arp->iface, strerror(errno)));
2672         }
2673
2674
2675         arp->count++;
2676         if (arp->count == CTDB_ARP_REPEAT) {
2677                 talloc_free(arp);
2678                 return;
2679         }
2680
2681         event_add_timed(arp->ctdb->ev, arp, 
2682                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2683                         send_gratious_arp, arp);
2684 }
2685
2686
2687 /*
2688   send a gratious arp 
2689  */
2690 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2691 {
2692         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2693         struct control_gratious_arp *arp;
2694
2695         /* verify the size of indata */
2696         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2697                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2698                                  (unsigned)indata.dsize, 
2699                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2700                 return -1;
2701         }
2702         if (indata.dsize != 
2703                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2704                 + gratious_arp->len ) ){
2705
2706                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2707                         "but should be %u bytes\n", 
2708                          (unsigned)indata.dsize, 
2709                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2710                 return -1;
2711         }
2712
2713
2714         arp = talloc(ctdb, struct control_gratious_arp);
2715         CTDB_NO_MEMORY(ctdb, arp);
2716
2717         arp->ctdb  = ctdb;
2718         arp->addr   = gratious_arp->addr;
2719         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2720         CTDB_NO_MEMORY(ctdb, arp->iface);
2721         arp->count = 0;
2722         
2723         event_add_timed(arp->ctdb->ev, arp, 
2724                         timeval_zero(), send_gratious_arp, arp);
2725
2726         return 0;
2727 }
2728
2729 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2730 {
2731         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2732         int ret;
2733
2734         /* verify the size of indata */
2735         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2736                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2737                 return -1;
2738         }
2739         if (indata.dsize != 
2740                 ( offsetof(struct ctdb_control_ip_iface, iface)
2741                 + pub->len ) ){
2742
2743                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2744                         "but should be %u bytes\n", 
2745                          (unsigned)indata.dsize, 
2746                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2747                 return -1;
2748         }
2749
2750         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2751
2752         if (ret != 0) {
2753                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2754                 return -1;
2755         }
2756
2757         return 0;
2758 }
2759
2760 /*
2761   called when releaseip event finishes for del_public_address
2762  */
2763 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2764                                 void *private_data)
2765 {
2766         talloc_free(private_data);
2767 }
2768
2769 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2770 {
2771         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2772         struct ctdb_vnn *vnn;
2773         int ret;
2774
2775         /* verify the size of indata */
2776         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2777                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2778                 return -1;
2779         }
2780         if (indata.dsize != 
2781                 ( offsetof(struct ctdb_control_ip_iface, iface)
2782                 + pub->len ) ){
2783
2784                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2785                         "but should be %u bytes\n", 
2786                          (unsigned)indata.dsize, 
2787                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2788                 return -1;
2789         }
2790
2791         /* walk over all public addresses until we find a match */
2792         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2793                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2794                         TALLOC_CTX *mem_ctx;
2795
2796                         DLIST_REMOVE(ctdb->vnn, vnn);
2797                         if (vnn->iface == NULL) {
2798                                 talloc_free(vnn);
2799                                 return 0;
2800                         }
2801
2802                         mem_ctx = talloc_new(ctdb);
2803                         ret = ctdb_event_script_callback(ctdb, 
2804                                          mem_ctx, delete_ip_callback, mem_ctx,
2805                                          false,
2806                                          CTDB_EVENT_RELEASE_IP,
2807                                          "%s %s %u",
2808                                          ctdb_vnn_iface_string(vnn),
2809                                          ctdb_addr_to_str(&vnn->public_address),
2810                                          vnn->public_netmask_bits);
2811                         ctdb_vnn_unassign_iface(ctdb, vnn);
2812                         talloc_free(vnn);
2813                         if (ret != 0) {
2814                                 return -1;
2815                         }
2816                         return 0;
2817                 }
2818         }
2819
2820         return -1;
2821 }
2822
2823 /* This function is called from the recovery daemon to verify that a remote
2824    node has the expected ip allocation.
2825    This is verified against ctdb->ip_tree
2826 */
2827 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2828 {
2829         struct ctdb_public_ip_list *tmp_ip; 
2830         int i;
2831
2832         if (ctdb->ip_tree == NULL) {
2833                 /* dont know the expected allocation yet, assume remote node
2834                    is correct. */
2835                 return 0;
2836         }
2837
2838         if (ips == NULL) {
2839                 return 0;
2840         }
2841
2842         for (i=0; i<ips->num; i++) {
2843                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2844                 if (tmp_ip == NULL) {
2845                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2846                         return -1;
2847                 }
2848
2849                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2850                         continue;
2851                 }
2852
2853                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2854                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2855                         return -1;
2856                 }
2857         }
2858
2859         return 0;
2860 }
2861
2862 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2863 {
2864         struct ctdb_public_ip_list *tmp_ip; 
2865
2866         if (ctdb->ip_tree == NULL) {
2867                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2868                 return -1;
2869         }
2870
2871         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2872         if (tmp_ip == NULL) {
2873                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2874                 return -1;
2875         }
2876
2877         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2878         tmp_ip->pnn = ip->pnn;
2879
2880         return 0;
2881 }