event: Update events to latest Samba version 0.9.8
[ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334
335         if (status != 0) {
336                 if (status == -ETIME) {
337                         ctdb_ban_self(ctdb);
338                 }
339                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
340                                  ctdb_addr_to_str(&state->vnn->public_address),
341                                  ctdb_vnn_iface_string(state->vnn)));
342                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
343                 talloc_free(state);
344                 return;
345         }
346
347         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
348         if (ret != 0) {
349                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
350                 talloc_free(state);
351                 return;
352         }
353
354         /* the control succeeded */
355         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
356         talloc_free(state);
357         return;
358 }
359
360 /*
361   take over an ip address
362  */
363 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
364                               struct ctdb_req_control *c,
365                               struct ctdb_vnn *vnn)
366 {
367         int ret;
368         struct ctdb_do_takeip_state *state;
369
370         ret = ctdb_vnn_assign_iface(ctdb, vnn);
371         if (ret != 0) {
372                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
373                                  "assin a usable interface\n",
374                                  ctdb_addr_to_str(&vnn->public_address),
375                                  vnn->public_netmask_bits));
376                 return -1;
377         }
378
379         state = talloc(vnn, struct ctdb_do_takeip_state);
380         CTDB_NO_MEMORY(ctdb, state);
381
382         state->c = talloc_steal(ctdb, c);
383         state->vnn   = vnn;
384
385         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
386                             ctdb_addr_to_str(&vnn->public_address),
387                             vnn->public_netmask_bits,
388                             ctdb_vnn_iface_string(vnn)));
389
390         ret = ctdb_event_script_callback(ctdb,
391                                          state,
392                                          ctdb_do_takeip_callback,
393                                          state,
394                                          false,
395                                          CTDB_EVENT_TAKE_IP,
396                                          "%s %s %u",
397                                          ctdb_vnn_iface_string(vnn),
398                                          ctdb_addr_to_str(&vnn->public_address),
399                                          vnn->public_netmask_bits);
400
401         if (ret != 0) {
402                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
403                         ctdb_addr_to_str(&vnn->public_address),
404                         ctdb_vnn_iface_string(vnn)));
405                 talloc_free(state);
406                 return -1;
407         }
408
409         return 0;
410 }
411
412 struct ctdb_do_updateip_state {
413         struct ctdb_req_control *c;
414         struct ctdb_iface *old;
415         struct ctdb_vnn *vnn;
416 };
417
418 /*
419   called when updateip event finishes
420  */
421 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
422                                       void *private_data)
423 {
424         struct ctdb_do_updateip_state *state =
425                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
426         int32_t ret;
427
428         if (status != 0) {
429                 if (status == -ETIME) {
430                         ctdb_ban_self(ctdb);
431                 }
432                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
433                         ctdb_addr_to_str(&state->vnn->public_address),
434                         state->old->name,
435                         ctdb_vnn_iface_string(state->vnn)));
436
437                 /*
438                  * All we can do is reset the old interface
439                  * and let the next run fix it
440                  */
441                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
442                 state->vnn->iface = state->old;
443                 state->vnn->iface->references++;
444
445                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
446                 talloc_free(state);
447                 return;
448         }
449
450         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
451         if (ret != 0) {
452                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
453                 talloc_free(state);
454                 return;
455         }
456
457         /* the control succeeded */
458         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
459         talloc_free(state);
460         return;
461 }
462
463 /*
464   update (move) an ip address
465  */
466 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
467                                 struct ctdb_req_control *c,
468                                 struct ctdb_vnn *vnn)
469 {
470         int ret;
471         struct ctdb_do_updateip_state *state;
472         struct ctdb_iface *old = vnn->iface;
473
474         ctdb_vnn_unassign_iface(ctdb, vnn);
475         ret = ctdb_vnn_assign_iface(ctdb, vnn);
476         if (ret != 0) {
477                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
478                                  "assin a usable interface (old iface '%s')\n",
479                                  ctdb_addr_to_str(&vnn->public_address),
480                                  vnn->public_netmask_bits,
481                                  old->name));
482                 return -1;
483         }
484
485         if (vnn->iface == old) {
486                 DEBUG(DEBUG_ERR,("update of IP %s/%u trying to "
487                                  "assin a same interface '%s'\n",
488                                  ctdb_addr_to_str(&vnn->public_address),
489                                  vnn->public_netmask_bits,
490                                  old->name));
491                 return -1;
492         }
493
494         state = talloc(vnn, struct ctdb_do_updateip_state);
495         CTDB_NO_MEMORY(ctdb, state);
496
497         state->c = talloc_steal(ctdb, c);
498         state->old = old;
499         state->vnn = vnn;
500
501         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
502                             "interface %s to %s\n",
503                             ctdb_addr_to_str(&vnn->public_address),
504                             vnn->public_netmask_bits,
505                             old->name,
506                             ctdb_vnn_iface_string(vnn)));
507
508         ret = ctdb_event_script_callback(ctdb,
509                                          state,
510                                          ctdb_do_updateip_callback,
511                                          state,
512                                          false,
513                                          CTDB_EVENT_UPDATE_IP,
514                                          "%s %s %s %u",
515                                          state->old->name,
516                                          ctdb_vnn_iface_string(vnn),
517                                          ctdb_addr_to_str(&vnn->public_address),
518                                          vnn->public_netmask_bits);
519         if (ret != 0) {
520                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
521                                  ctdb_addr_to_str(&vnn->public_address),
522                                  old->name, ctdb_vnn_iface_string(vnn)));
523                 talloc_free(state);
524                 return -1;
525         }
526
527         return 0;
528 }
529
530 /*
531   Find the vnn of the node that has a public ip address
532   returns -1 if the address is not known as a public address
533  */
534 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
535 {
536         struct ctdb_vnn *vnn;
537
538         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
539                 if (ctdb_same_ip(&vnn->public_address, addr)) {
540                         return vnn;
541                 }
542         }
543
544         return NULL;
545 }
546
547 /*
548   take over an ip address
549  */
550 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
551                                  struct ctdb_req_control *c,
552                                  TDB_DATA indata,
553                                  bool *async_reply)
554 {
555         int ret;
556         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
557         struct ctdb_vnn *vnn;
558         bool have_ip = false;
559         bool do_updateip = false;
560         bool do_takeip = false;
561         struct ctdb_iface *best_iface = NULL;
562
563         if (pip->pnn != ctdb->pnn) {
564                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
565                                  "with pnn %d, but we're node %d\n",
566                                  ctdb_addr_to_str(&pip->addr),
567                                  pip->pnn, ctdb->pnn));
568                 return -1;
569         }
570
571         /* update out vnn list */
572         vnn = find_public_ip_vnn(ctdb, &pip->addr);
573         if (vnn == NULL) {
574                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
575                         ctdb_addr_to_str(&pip->addr)));
576                 return 0;
577         }
578
579         have_ip = ctdb_sys_have_ip(&pip->addr);
580         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
581         if (best_iface == NULL) {
582                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
583                                  "a usable interface (old %s, have_ip %d)\n",
584                                  ctdb_addr_to_str(&vnn->public_address),
585                                  vnn->public_netmask_bits,
586                                  ctdb_vnn_iface_string(vnn),
587                                  have_ip));
588                 return -1;
589         }
590
591         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
592                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
593                 have_ip = false;
594         }
595
596         if (vnn->iface == NULL && have_ip) {
597                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
598                                   "but we have no interface assigned, has someone manually configured it?"
599                                   "banning ourself\n",
600                                  ctdb_addr_to_str(&vnn->public_address)));
601                 ctdb_ban_self(ctdb);
602                 return -1;
603         }
604
605         if (vnn->pnn != ctdb->pnn && have_ip) {
606                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
607                                   "and we have it on iface[%s], but it was assigned to node %d"
608                                   "and we are node %d, banning ourself\n",
609                                  ctdb_addr_to_str(&vnn->public_address),
610                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
611                 ctdb_ban_self(ctdb);
612                 return -1;
613         }
614
615         if (vnn->iface) {
616                 if (vnn->iface->link_up) {
617                         /* only move when the rebalance gains something */
618                         if (vnn->iface->references > (best_iface->references + 1)) {
619                                 do_updateip = true;
620                         }
621                 } else if (vnn->iface != best_iface) {
622                         do_updateip = true;
623                 }
624         }
625
626         if (!have_ip) {
627                 if (do_updateip) {
628                         ctdb_vnn_unassign_iface(ctdb, vnn);
629                         do_updateip = false;
630                 }
631                 do_takeip = true;
632         }
633
634         if (do_takeip) {
635                 ret = ctdb_do_takeip(ctdb, c, vnn);
636                 if (ret != 0) {
637                         return -1;
638                 }
639         } else if (do_updateip) {
640                 ret = ctdb_do_updateip(ctdb, c, vnn);
641                 if (ret != 0) {
642                         return -1;
643                 }
644         } else {
645                 /*
646                  * The interface is up and the kernel known the ip
647                  * => do nothing
648                  */
649                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
650                         ctdb_addr_to_str(&pip->addr),
651                         vnn->public_netmask_bits,
652                         ctdb_vnn_iface_string(vnn)));
653                 return 0;
654         }
655
656         /* tell ctdb_control.c that we will be replying asynchronously */
657         *async_reply = true;
658
659         return 0;
660 }
661
662 /*
663   takeover an ip address old v4 style
664  */
665 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
666                                 struct ctdb_req_control *c,
667                                 TDB_DATA indata, 
668                                 bool *async_reply)
669 {
670         TDB_DATA data;
671         
672         data.dsize = sizeof(struct ctdb_public_ip);
673         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
674         CTDB_NO_MEMORY(ctdb, data.dptr);
675         
676         memcpy(data.dptr, indata.dptr, indata.dsize);
677         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
678 }
679
680 /*
681   kill any clients that are registered with a IP that is being released
682  */
683 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
684 {
685         struct ctdb_client_ip *ip;
686
687         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
688                 ctdb_addr_to_str(addr)));
689
690         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
691                 ctdb_sock_addr tmp_addr;
692
693                 tmp_addr = ip->addr;
694                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
695                         ip->client_id,
696                         ctdb_addr_to_str(&ip->addr)));
697
698                 if (ctdb_same_ip(&tmp_addr, addr)) {
699                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
700                                                                      ip->client_id, 
701                                                                      struct ctdb_client);
702                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
703                                 ip->client_id,
704                                 ctdb_addr_to_str(&ip->addr),
705                                 client->pid));
706
707                         if (client->pid != 0) {
708                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
709                                         (unsigned)client->pid,
710                                         ctdb_addr_to_str(addr),
711                                         ip->client_id));
712                                 kill(client->pid, SIGKILL);
713                         }
714                 }
715         }
716 }
717
718 /*
719   called when releaseip event finishes
720  */
721 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
722                                 void *private_data)
723 {
724         struct takeover_callback_state *state = 
725                 talloc_get_type(private_data, struct takeover_callback_state);
726         TDB_DATA data;
727
728         if (status == -ETIME) {
729                 ctdb_ban_self(ctdb);
730         }
731
732         /* send a message to all clients of this node telling them
733            that the cluster has been reconfigured and they should
734            release any sockets on this IP */
735         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
736         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
737         data.dsize = strlen((char *)data.dptr)+1;
738
739         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
740
741         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
742
743         /* kill clients that have registered with this IP */
744         release_kill_clients(ctdb, state->addr);
745
746         ctdb_vnn_unassign_iface(ctdb, state->vnn);
747
748         /* the control succeeded */
749         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
750         talloc_free(state);
751 }
752
753 /*
754   release an ip address
755  */
756 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
757                                 struct ctdb_req_control *c,
758                                 TDB_DATA indata, 
759                                 bool *async_reply)
760 {
761         int ret;
762         struct takeover_callback_state *state;
763         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
764         struct ctdb_vnn *vnn;
765
766         /* update our vnn list */
767         vnn = find_public_ip_vnn(ctdb, &pip->addr);
768         if (vnn == NULL) {
769                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
770                         ctdb_addr_to_str(&pip->addr)));
771                 return 0;
772         }
773         vnn->pnn = pip->pnn;
774
775         /* stop any previous arps */
776         talloc_free(vnn->takeover_ctx);
777         vnn->takeover_ctx = NULL;
778
779         if (!ctdb_sys_have_ip(&pip->addr)) {
780                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
781                         ctdb_addr_to_str(&pip->addr),
782                         vnn->public_netmask_bits, 
783                         ctdb_vnn_iface_string(vnn)));
784                 ctdb_vnn_unassign_iface(ctdb, vnn);
785                 return 0;
786         }
787
788         if (vnn->iface == NULL) {
789                 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
790                                   "but we have no interface assigned, has someone manually configured it?"
791                                   "banning ourself\n",
792                                  ctdb_addr_to_str(&vnn->public_address)));
793                 ctdb_ban_self(ctdb);
794                 return -1;
795         }
796
797         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
798                 ctdb_addr_to_str(&pip->addr),
799                 vnn->public_netmask_bits, 
800                 ctdb_vnn_iface_string(vnn),
801                 pip->pnn));
802
803         state = talloc(ctdb, struct takeover_callback_state);
804         CTDB_NO_MEMORY(ctdb, state);
805
806         state->c = talloc_steal(state, c);
807         state->addr = talloc(state, ctdb_sock_addr);       
808         CTDB_NO_MEMORY(ctdb, state->addr);
809         *state->addr = pip->addr;
810         state->vnn   = vnn;
811
812         ret = ctdb_event_script_callback(ctdb, 
813                                          state, release_ip_callback, state,
814                                          false,
815                                          CTDB_EVENT_RELEASE_IP,
816                                          "%s %s %u",
817                                          ctdb_vnn_iface_string(vnn),
818                                          ctdb_addr_to_str(&pip->addr),
819                                          vnn->public_netmask_bits);
820         if (ret != 0) {
821                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
822                         ctdb_addr_to_str(&pip->addr),
823                         ctdb_vnn_iface_string(vnn)));
824                 talloc_free(state);
825                 return -1;
826         }
827
828         /* tell the control that we will be reply asynchronously */
829         *async_reply = true;
830         return 0;
831 }
832
833 /*
834   release an ip address old v4 style
835  */
836 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
837                                 struct ctdb_req_control *c,
838                                 TDB_DATA indata, 
839                                 bool *async_reply)
840 {
841         TDB_DATA data;
842         
843         data.dsize = sizeof(struct ctdb_public_ip);
844         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
845         CTDB_NO_MEMORY(ctdb, data.dptr);
846         
847         memcpy(data.dptr, indata.dptr, indata.dsize);
848         return ctdb_control_release_ip(ctdb, c, data, async_reply);
849 }
850
851
852 static int ctdb_add_public_address(struct ctdb_context *ctdb,
853                                    ctdb_sock_addr *addr,
854                                    unsigned mask, const char *ifaces)
855 {
856         struct ctdb_vnn      *vnn;
857         uint32_t num = 0;
858         char *tmp;
859         const char *iface;
860         int i;
861         int ret;
862
863         /* Verify that we dont have an entry for this ip yet */
864         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
865                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
866                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
867                                 ctdb_addr_to_str(addr)));
868                         return -1;
869                 }               
870         }
871
872         /* create a new vnn structure for this ip address */
873         vnn = talloc_zero(ctdb, struct ctdb_vnn);
874         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
875         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
876         tmp = talloc_strdup(vnn, ifaces);
877         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
878         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
879                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
880                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
881                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
882                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
883                 num++;
884         }
885         talloc_free(tmp);
886         vnn->ifaces[num] = NULL;
887         vnn->public_address      = *addr;
888         vnn->public_netmask_bits = mask;
889         vnn->pnn                 = -1;
890
891         for (i=0; vnn->ifaces[i]; i++) {
892                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
893                 if (ret != 0) {
894                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
895                                            "for public_address[%s]\n",
896                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
897                         talloc_free(vnn);
898                         return -1;
899                 }
900         }
901
902         DLIST_ADD(ctdb->vnn, vnn);
903
904         return 0;
905 }
906
907 /*
908   setup the event script directory
909 */
910 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
911 {
912         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
913         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
914         return 0;
915 }
916
917 /*
918   setup the public address lists from a file
919 */
920 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
921 {
922         char **lines;
923         int nlines;
924         int i;
925
926         lines = file_lines_load(alist, &nlines, ctdb);
927         if (lines == NULL) {
928                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
929                 return -1;
930         }
931         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
932                 nlines--;
933         }
934
935         for (i=0;i<nlines;i++) {
936                 unsigned mask;
937                 ctdb_sock_addr addr;
938                 const char *addrstr;
939                 const char *ifaces;
940                 char *tok, *line;
941
942                 line = lines[i];
943                 while ((*line == ' ') || (*line == '\t')) {
944                         line++;
945                 }
946                 if (*line == '#') {
947                         continue;
948                 }
949                 if (strcmp(line, "") == 0) {
950                         continue;
951                 }
952                 tok = strtok(line, " \t");
953                 addrstr = tok;
954                 tok = strtok(NULL, " \t");
955                 if (tok == NULL) {
956                         if (NULL == ctdb->default_public_interface) {
957                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
958                                          i+1));
959                                 talloc_free(lines);
960                                 return -1;
961                         }
962                         ifaces = ctdb->default_public_interface;
963                 } else {
964                         ifaces = tok;
965                 }
966
967                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
968                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
969                         talloc_free(lines);
970                         return -1;
971                 }
972                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
973                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
974                         talloc_free(lines);
975                         return -1;
976                 }
977         }
978
979         talloc_free(lines);
980         return 0;
981 }
982
983 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
984                               const char *iface,
985                               const char *ip)
986 {
987         struct ctdb_vnn *svnn;
988         bool ok;
989         int ret;
990
991         svnn = talloc_zero(ctdb, struct ctdb_vnn);
992         CTDB_NO_MEMORY(ctdb, svnn);
993
994         svnn->ifaces = talloc_array(svnn, const char *, 2);
995         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
996         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
997         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
998         svnn->ifaces[1] = NULL;
999
1000         ok = parse_ip(ip, iface, 0, &svnn->public_address);
1001         if (!ok) {
1002                 talloc_free(svnn);
1003                 return -1;
1004         }
1005
1006         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1007         if (ret != 0) {
1008                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1009                                    "for single_ip[%s]\n",
1010                                    svnn->ifaces[0],
1011                                    ctdb_addr_to_str(&svnn->public_address)));
1012                 talloc_free(svnn);
1013                 return -1;
1014         }
1015
1016         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1017         if (ret != 0) {
1018                 talloc_free(svnn);
1019                 return -1;
1020         }
1021
1022         ctdb->single_ip_vnn = svnn;
1023         return 0;
1024 }
1025
1026 struct ctdb_public_ip_list {
1027         struct ctdb_public_ip_list *next;
1028         uint32_t pnn;
1029         ctdb_sock_addr addr;
1030 };
1031
1032
1033 /* Given a physical node, return the number of
1034    public addresses that is currently assigned to this node.
1035 */
1036 static int node_ip_coverage(struct ctdb_context *ctdb, 
1037         int32_t pnn,
1038         struct ctdb_public_ip_list *ips)
1039 {
1040         int num=0;
1041
1042         for (;ips;ips=ips->next) {
1043                 if (ips->pnn == pnn) {
1044                         num++;
1045                 }
1046         }
1047         return num;
1048 }
1049
1050
1051 /* Check if this is a public ip known to the node, i.e. can that
1052    node takeover this ip ?
1053 */
1054 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1055                 struct ctdb_public_ip_list *ip)
1056 {
1057         struct ctdb_all_public_ips *public_ips;
1058         int i;
1059
1060         public_ips = ctdb->nodes[pnn]->available_public_ips;
1061
1062         if (public_ips == NULL) {
1063                 return -1;
1064         }
1065
1066         for (i=0;i<public_ips->num;i++) {
1067                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1068                         /* yes, this node can serve this public ip */
1069                         return 0;
1070                 }
1071         }
1072
1073         return -1;
1074 }
1075
1076
1077 /* search the node lists list for a node to takeover this ip.
1078    pick the node that currently are serving the least number of ips
1079    so that the ips get spread out evenly.
1080 */
1081 static int find_takeover_node(struct ctdb_context *ctdb, 
1082                 struct ctdb_node_map *nodemap, uint32_t mask, 
1083                 struct ctdb_public_ip_list *ip,
1084                 struct ctdb_public_ip_list *all_ips)
1085 {
1086         int pnn, min=0, num;
1087         int i;
1088
1089         pnn    = -1;
1090         for (i=0;i<nodemap->num;i++) {
1091                 if (nodemap->nodes[i].flags & mask) {
1092                         /* This node is not healty and can not be used to serve
1093                            a public address 
1094                         */
1095                         continue;
1096                 }
1097
1098                 /* verify that this node can serve this ip */
1099                 if (can_node_serve_ip(ctdb, i, ip)) {
1100                         /* no it couldnt   so skip to the next node */
1101                         continue;
1102                 }
1103
1104                 num = node_ip_coverage(ctdb, i, all_ips);
1105                 /* was this the first node we checked ? */
1106                 if (pnn == -1) {
1107                         pnn = i;
1108                         min  = num;
1109                 } else {
1110                         if (num < min) {
1111                                 pnn = i;
1112                                 min  = num;
1113                         }
1114                 }
1115         }       
1116         if (pnn == -1) {
1117                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1118                         ctdb_addr_to_str(&ip->addr)));
1119
1120                 return -1;
1121         }
1122
1123         ip->pnn = pnn;
1124         return 0;
1125 }
1126
1127 #define IP_KEYLEN       4
1128 static uint32_t *ip_key(ctdb_sock_addr *ip)
1129 {
1130         static uint32_t key[IP_KEYLEN];
1131
1132         bzero(key, sizeof(key));
1133
1134         switch (ip->sa.sa_family) {
1135         case AF_INET:
1136                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1137                 break;
1138         case AF_INET6:
1139                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1140                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1141                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1142                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1143                 break;
1144         default:
1145                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1146                 return key;
1147         }
1148
1149         return key;
1150 }
1151
1152 static void *add_ip_callback(void *parm, void *data)
1153 {
1154         return parm;
1155 }
1156
1157 void getips_count_callback(void *param, void *data)
1158 {
1159         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1160         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1161
1162         new_ip->next = *ip_list;
1163         *ip_list     = new_ip;
1164 }
1165
1166 static struct ctdb_public_ip_list *
1167 create_merged_ip_list(struct ctdb_context *ctdb)
1168 {
1169         int i, j;
1170         struct ctdb_public_ip_list *ip_list;
1171         struct ctdb_all_public_ips *public_ips;
1172
1173         if (ctdb->ip_tree != NULL) {
1174                 talloc_free(ctdb->ip_tree);
1175                 ctdb->ip_tree = NULL;
1176         }
1177         ctdb->ip_tree = trbt_create(ctdb, 0);
1178
1179         for (i=0;i<ctdb->num_nodes;i++) {
1180                 public_ips = ctdb->nodes[i]->known_public_ips;
1181
1182                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1183                         continue;
1184                 }
1185
1186                 /* there were no public ips for this node */
1187                 if (public_ips == NULL) {
1188                         continue;
1189                 }               
1190
1191                 for (j=0;j<public_ips->num;j++) {
1192                         struct ctdb_public_ip_list *tmp_ip; 
1193
1194                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1195                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1196                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1197                         tmp_ip->addr = public_ips->ips[j].addr;
1198                         tmp_ip->next = NULL;
1199
1200                         trbt_insertarray32_callback(ctdb->ip_tree,
1201                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1202                                 add_ip_callback,
1203                                 tmp_ip);
1204                 }
1205         }
1206
1207         ip_list = NULL;
1208         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1209
1210         return ip_list;
1211 }
1212
1213 /*
1214   make any IP alias changes for public addresses that are necessary 
1215  */
1216 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1217 {
1218         int i, num_healthy, retries;
1219         struct ctdb_public_ip ip;
1220         struct ctdb_public_ipv4 ipv4;
1221         uint32_t mask;
1222         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1223         int maxnode, maxnum=0, minnode, minnum=0, num;
1224         TDB_DATA data;
1225         struct timeval timeout;
1226         struct client_async_data *async_data;
1227         struct ctdb_client_control_state *state;
1228         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1229
1230
1231         ZERO_STRUCT(ip);
1232
1233         /* Count how many completely healthy nodes we have */
1234         num_healthy = 0;
1235         for (i=0;i<nodemap->num;i++) {
1236                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1237                         num_healthy++;
1238                 }
1239         }
1240
1241         if (num_healthy > 0) {
1242                 /* We have healthy nodes, so only consider them for 
1243                    serving public addresses
1244                 */
1245                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1246         } else {
1247                 /* We didnt have any completely healthy nodes so
1248                    use "disabled" nodes as a fallback
1249                 */
1250                 mask = NODE_FLAGS_INACTIVE;
1251         }
1252
1253         /* since nodes only know about those public addresses that
1254            can be served by that particular node, no single node has
1255            a full list of all public addresses that exist in the cluster.
1256            Walk over all node structures and create a merged list of
1257            all public addresses that exist in the cluster.
1258
1259            keep the tree of ips around as ctdb->ip_tree
1260         */
1261         all_ips = create_merged_ip_list(ctdb);
1262
1263         /* If we want deterministic ip allocations, i.e. that the ip addresses
1264            will always be allocated the same way for a specific set of
1265            available/unavailable nodes.
1266         */
1267         if (1 == ctdb->tunable.deterministic_public_ips) {              
1268                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1269                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1270                         tmp_ip->pnn = i%nodemap->num;
1271                 }
1272         }
1273
1274
1275         /* mark all public addresses with a masked node as being served by
1276            node -1
1277         */
1278         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1279                 if (tmp_ip->pnn == -1) {
1280                         continue;
1281                 }
1282                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1283                         tmp_ip->pnn = -1;
1284                 }
1285         }
1286
1287         /* verify that the assigned nodes can serve that public ip
1288            and set it to -1 if not
1289         */
1290         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1291                 if (tmp_ip->pnn == -1) {
1292                         continue;
1293                 }
1294                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1295                         /* this node can not serve this ip. */
1296                         tmp_ip->pnn = -1;
1297                 }
1298         }
1299
1300
1301         /* now we must redistribute all public addresses with takeover node
1302            -1 among the nodes available
1303         */
1304         retries = 0;
1305 try_again:
1306         /* loop over all ip's and find a physical node to cover for 
1307            each unassigned ip.
1308         */
1309         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1310                 if (tmp_ip->pnn == -1) {
1311                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1312                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1313                                         ctdb_addr_to_str(&tmp_ip->addr)));
1314                         }
1315                 }
1316         }
1317
1318         /* If we dont want ips to fail back after a node becomes healthy
1319            again, we wont even try to reallocat the ip addresses so that
1320            they are evenly spread out.
1321            This can NOT be used at the same time as DeterministicIPs !
1322         */
1323         if (1 == ctdb->tunable.no_ip_failback) {
1324                 if (1 == ctdb->tunable.deterministic_public_ips) {
1325                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1326                 }
1327                 goto finished;
1328         }
1329
1330
1331         /* now, try to make sure the ip adresses are evenly distributed
1332            across the node.
1333            for each ip address, loop over all nodes that can serve this
1334            ip and make sure that the difference between the node
1335            serving the most and the node serving the least ip's are not greater
1336            than 1.
1337         */
1338         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1339                 if (tmp_ip->pnn == -1) {
1340                         continue;
1341                 }
1342
1343                 /* Get the highest and lowest number of ips's served by any 
1344                    valid node which can serve this ip.
1345                 */
1346                 maxnode = -1;
1347                 minnode = -1;
1348                 for (i=0;i<nodemap->num;i++) {
1349                         if (nodemap->nodes[i].flags & mask) {
1350                                 continue;
1351                         }
1352
1353                         /* only check nodes that can actually serve this ip */
1354                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1355                                 /* no it couldnt   so skip to the next node */
1356                                 continue;
1357                         }
1358
1359                         num = node_ip_coverage(ctdb, i, all_ips);
1360                         if (maxnode == -1) {
1361                                 maxnode = i;
1362                                 maxnum  = num;
1363                         } else {
1364                                 if (num > maxnum) {
1365                                         maxnode = i;
1366                                         maxnum  = num;
1367                                 }
1368                         }
1369                         if (minnode == -1) {
1370                                 minnode = i;
1371                                 minnum  = num;
1372                         } else {
1373                                 if (num < minnum) {
1374                                         minnode = i;
1375                                         minnum  = num;
1376                                 }
1377                         }
1378                 }
1379                 if (maxnode == -1) {
1380                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1381                                 ctdb_addr_to_str(&tmp_ip->addr)));
1382
1383                         continue;
1384                 }
1385
1386                 /* If we want deterministic IPs then dont try to reallocate 
1387                    them to spread out the load.
1388                 */
1389                 if (1 == ctdb->tunable.deterministic_public_ips) {
1390                         continue;
1391                 }
1392
1393                 /* if the spread between the smallest and largest coverage by
1394                    a node is >=2 we steal one of the ips from the node with
1395                    most coverage to even things out a bit.
1396                    try to do this at most 5 times  since we dont want to spend
1397                    too much time balancing the ip coverage.
1398                 */
1399                 if ( (maxnum > minnum+1)
1400                   && (retries < 5) ){
1401                         struct ctdb_public_ip_list *tmp;
1402
1403                         /* mark one of maxnode's vnn's as unassigned and try
1404                            again
1405                         */
1406                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1407                                 if (tmp->pnn == maxnode) {
1408                                         tmp->pnn = -1;
1409                                         retries++;
1410                                         goto try_again;
1411                                 }
1412                         }
1413                 }
1414         }
1415
1416
1417         /* finished distributing the public addresses, now just send the 
1418            info out to the nodes
1419         */
1420 finished:
1421
1422         /* at this point ->pnn is the node which will own each IP
1423            or -1 if there is no node that can cover this ip
1424         */
1425
1426         /* now tell all nodes to delete any alias that they should not
1427            have.  This will be a NOOP on nodes that don't currently
1428            hold the given alias */
1429         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1430         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1431
1432         for (i=0;i<nodemap->num;i++) {
1433                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1434                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1435                         continue;
1436                 }
1437
1438                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1439                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1440                                 /* This node should be serving this
1441                                    vnn so dont tell it to release the ip
1442                                 */
1443                                 continue;
1444                         }
1445                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1446                                 ipv4.pnn = tmp_ip->pnn;
1447                                 ipv4.sin = tmp_ip->addr.ip;
1448
1449                                 timeout = TAKEOVER_TIMEOUT();
1450                                 data.dsize = sizeof(ipv4);
1451                                 data.dptr  = (uint8_t *)&ipv4;
1452                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1453                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1454                                                 data, async_data,
1455                                                 &timeout, NULL);
1456                         } else {
1457                                 ip.pnn  = tmp_ip->pnn;
1458                                 ip.addr = tmp_ip->addr;
1459
1460                                 timeout = TAKEOVER_TIMEOUT();
1461                                 data.dsize = sizeof(ip);
1462                                 data.dptr  = (uint8_t *)&ip;
1463                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1464                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1465                                                 data, async_data,
1466                                                 &timeout, NULL);
1467                         }
1468
1469                         if (state == NULL) {
1470                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1471                                 talloc_free(tmp_ctx);
1472                                 return -1;
1473                         }
1474                 
1475                         ctdb_client_async_add(async_data, state);
1476                 }
1477         }
1478         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1479                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1480                 talloc_free(tmp_ctx);
1481                 return -1;
1482         }
1483         talloc_free(async_data);
1484
1485
1486         /* tell all nodes to get their own IPs */
1487         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1488         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1489         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1490                 if (tmp_ip->pnn == -1) {
1491                         /* this IP won't be taken over */
1492                         continue;
1493                 }
1494
1495                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1496                         ipv4.pnn = tmp_ip->pnn;
1497                         ipv4.sin = tmp_ip->addr.ip;
1498
1499                         timeout = TAKEOVER_TIMEOUT();
1500                         data.dsize = sizeof(ipv4);
1501                         data.dptr  = (uint8_t *)&ipv4;
1502                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1503                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1504                                         data, async_data,
1505                                         &timeout, NULL);
1506                 } else {
1507                         ip.pnn  = tmp_ip->pnn;
1508                         ip.addr = tmp_ip->addr;
1509
1510                         timeout = TAKEOVER_TIMEOUT();
1511                         data.dsize = sizeof(ip);
1512                         data.dptr  = (uint8_t *)&ip;
1513                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1514                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1515                                         data, async_data,
1516                                         &timeout, NULL);
1517                 }
1518                 if (state == NULL) {
1519                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1520                         talloc_free(tmp_ctx);
1521                         return -1;
1522                 }
1523                 
1524                 ctdb_client_async_add(async_data, state);
1525         }
1526         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1527                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1528                 talloc_free(tmp_ctx);
1529                 return -1;
1530         }
1531
1532         talloc_free(tmp_ctx);
1533         return 0;
1534 }
1535
1536
1537 /*
1538   destroy a ctdb_client_ip structure
1539  */
1540 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1541 {
1542         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1543                 ctdb_addr_to_str(&ip->addr),
1544                 ntohs(ip->addr.ip.sin_port),
1545                 ip->client_id));
1546
1547         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1548         return 0;
1549 }
1550
1551 /*
1552   called by a client to inform us of a TCP connection that it is managing
1553   that should tickled with an ACK when IP takeover is done
1554   we handle both the old ipv4 style of packets as well as the new ipv4/6
1555   pdus.
1556  */
1557 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1558                                 TDB_DATA indata)
1559 {
1560         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1561         struct ctdb_control_tcp *old_addr = NULL;
1562         struct ctdb_control_tcp_addr new_addr;
1563         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1564         struct ctdb_tcp_list *tcp;
1565         struct ctdb_control_tcp_vnn t;
1566         int ret;
1567         TDB_DATA data;
1568         struct ctdb_client_ip *ip;
1569         struct ctdb_vnn *vnn;
1570         ctdb_sock_addr addr;
1571
1572         switch (indata.dsize) {
1573         case sizeof(struct ctdb_control_tcp):
1574                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1575                 ZERO_STRUCT(new_addr);
1576                 tcp_sock = &new_addr;
1577                 tcp_sock->src.ip  = old_addr->src;
1578                 tcp_sock->dest.ip = old_addr->dest;
1579                 break;
1580         case sizeof(struct ctdb_control_tcp_addr):
1581                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1582                 break;
1583         default:
1584                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1585                                  "to ctdb_control_tcp_client. size was %d but "
1586                                  "only allowed sizes are %lu and %lu\n",
1587                                  (int)indata.dsize,
1588                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1589                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1590                 return -1;
1591         }
1592
1593         addr = tcp_sock->src;
1594         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1595         addr = tcp_sock->dest;
1596         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1597
1598         ZERO_STRUCT(addr);
1599         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1600         vnn = find_public_ip_vnn(ctdb, &addr);
1601         if (vnn == NULL) {
1602                 switch (addr.sa.sa_family) {
1603                 case AF_INET:
1604                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1605                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1606                                         ctdb_addr_to_str(&addr)));
1607                         }
1608                         break;
1609                 case AF_INET6:
1610                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1611                                 ctdb_addr_to_str(&addr)));
1612                         break;
1613                 default:
1614                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1615                 }
1616
1617                 return 0;
1618         }
1619
1620         if (vnn->pnn != ctdb->pnn) {
1621                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1622                         ctdb_addr_to_str(&addr),
1623                         client_id, client->pid));
1624                 /* failing this call will tell smbd to die */
1625                 return -1;
1626         }
1627
1628         ip = talloc(client, struct ctdb_client_ip);
1629         CTDB_NO_MEMORY(ctdb, ip);
1630
1631         ip->ctdb      = ctdb;
1632         ip->addr      = addr;
1633         ip->client_id = client_id;
1634         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1635         DLIST_ADD(ctdb->client_ip_list, ip);
1636
1637         tcp = talloc(client, struct ctdb_tcp_list);
1638         CTDB_NO_MEMORY(ctdb, tcp);
1639
1640         tcp->connection.src_addr = tcp_sock->src;
1641         tcp->connection.dst_addr = tcp_sock->dest;
1642
1643         DLIST_ADD(client->tcp_list, tcp);
1644
1645         t.src  = tcp_sock->src;
1646         t.dest = tcp_sock->dest;
1647
1648         data.dptr = (uint8_t *)&t;
1649         data.dsize = sizeof(t);
1650
1651         switch (addr.sa.sa_family) {
1652         case AF_INET:
1653                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1654                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1655                         ctdb_addr_to_str(&tcp_sock->src),
1656                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1657                 break;
1658         case AF_INET6:
1659                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1660                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1661                         ctdb_addr_to_str(&tcp_sock->src),
1662                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1663                 break;
1664         default:
1665                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1666         }
1667
1668
1669         /* tell all nodes about this tcp connection */
1670         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1671                                        CTDB_CONTROL_TCP_ADD,
1672                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1673         if (ret != 0) {
1674                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1675                 return -1;
1676         }
1677
1678         return 0;
1679 }
1680
1681 /*
1682   find a tcp address on a list
1683  */
1684 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1685                                            struct ctdb_tcp_connection *tcp)
1686 {
1687         int i;
1688
1689         if (array == NULL) {
1690                 return NULL;
1691         }
1692
1693         for (i=0;i<array->num;i++) {
1694                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1695                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1696                         return &array->connections[i];
1697                 }
1698         }
1699         return NULL;
1700 }
1701
1702 /*
1703   called by a daemon to inform us of a TCP connection that one of its
1704   clients managing that should tickled with an ACK when IP takeover is
1705   done
1706  */
1707 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata)
1708 {
1709         struct ctdb_control_tcp_vnn *p = (struct ctdb_control_tcp_vnn *)indata.dptr;
1710         struct ctdb_tcp_array *tcparray;
1711         struct ctdb_tcp_connection tcp;
1712         struct ctdb_vnn *vnn;
1713
1714         vnn = find_public_ip_vnn(ctdb, &p->dest);
1715         if (vnn == NULL) {
1716                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1717                         ctdb_addr_to_str(&p->dest)));
1718
1719                 return -1;
1720         }
1721
1722
1723         tcparray = vnn->tcp_array;
1724
1725         /* If this is the first tickle */
1726         if (tcparray == NULL) {
1727                 tcparray = talloc_size(ctdb->nodes, 
1728                         offsetof(struct ctdb_tcp_array, connections) +
1729                         sizeof(struct ctdb_tcp_connection) * 1);
1730                 CTDB_NO_MEMORY(ctdb, tcparray);
1731                 vnn->tcp_array = tcparray;
1732
1733                 tcparray->num = 0;
1734                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1735                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1736
1737                 tcparray->connections[tcparray->num].src_addr = p->src;
1738                 tcparray->connections[tcparray->num].dst_addr = p->dest;
1739                 tcparray->num++;
1740                 return 0;
1741         }
1742
1743
1744         /* Do we already have this tickle ?*/
1745         tcp.src_addr = p->src;
1746         tcp.dst_addr = p->dest;
1747         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1748                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1749                         ctdb_addr_to_str(&tcp.dst_addr),
1750                         ntohs(tcp.dst_addr.ip.sin_port),
1751                         vnn->pnn));
1752                 return 0;
1753         }
1754
1755         /* A new tickle, we must add it to the array */
1756         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1757                                         struct ctdb_tcp_connection,
1758                                         tcparray->num+1);
1759         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1760
1761         vnn->tcp_array = tcparray;
1762         tcparray->connections[tcparray->num].src_addr = p->src;
1763         tcparray->connections[tcparray->num].dst_addr = p->dest;
1764         tcparray->num++;
1765                                 
1766         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1767                 ctdb_addr_to_str(&tcp.dst_addr),
1768                 ntohs(tcp.dst_addr.ip.sin_port),
1769                 vnn->pnn));
1770
1771         return 0;
1772 }
1773
1774
1775 /*
1776   called by a daemon to inform us of a TCP connection that one of its
1777   clients managing that should tickled with an ACK when IP takeover is
1778   done
1779  */
1780 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1781 {
1782         struct ctdb_tcp_connection *tcpp;
1783         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1784
1785         if (vnn == NULL) {
1786                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1787                         ctdb_addr_to_str(&conn->dst_addr)));
1788                 return;
1789         }
1790
1791         /* if the array is empty we cant remove it
1792            and we dont need to do anything
1793          */
1794         if (vnn->tcp_array == NULL) {
1795                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1796                         ctdb_addr_to_str(&conn->dst_addr),
1797                         ntohs(conn->dst_addr.ip.sin_port)));
1798                 return;
1799         }
1800
1801
1802         /* See if we know this connection
1803            if we dont know this connection  then we dont need to do anything
1804          */
1805         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1806         if (tcpp == NULL) {
1807                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1808                         ctdb_addr_to_str(&conn->dst_addr),
1809                         ntohs(conn->dst_addr.ip.sin_port)));
1810                 return;
1811         }
1812
1813
1814         /* We need to remove this entry from the array.
1815            Instead of allocating a new array and copying data to it
1816            we cheat and just copy the last entry in the existing array
1817            to the entry that is to be removed and just shring the 
1818            ->num field
1819          */
1820         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1821         vnn->tcp_array->num--;
1822
1823         /* If we deleted the last entry we also need to remove the entire array
1824          */
1825         if (vnn->tcp_array->num == 0) {
1826                 talloc_free(vnn->tcp_array);
1827                 vnn->tcp_array = NULL;
1828         }               
1829
1830         vnn->tcp_update_needed = true;
1831
1832         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1833                 ctdb_addr_to_str(&conn->src_addr),
1834                 ntohs(conn->src_addr.ip.sin_port)));
1835 }
1836
1837
1838 /*
1839   called when a daemon restarts - send all tickes for all public addresses
1840   we are serving immediately to the new node.
1841  */
1842 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1843 {
1844 /*XXX here we should send all tickes we are serving to the new node */
1845         return 0;
1846 }
1847
1848
1849 /*
1850   called when a client structure goes away - hook to remove
1851   elements from the tcp_list in all daemons
1852  */
1853 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1854 {
1855         while (client->tcp_list) {
1856                 struct ctdb_tcp_list *tcp = client->tcp_list;
1857                 DLIST_REMOVE(client->tcp_list, tcp);
1858                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1859         }
1860 }
1861
1862
1863 /*
1864   release all IPs on shutdown
1865  */
1866 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1867 {
1868         struct ctdb_vnn *vnn;
1869
1870         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1871                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1872                         ctdb_vnn_unassign_iface(ctdb, vnn);
1873                         continue;
1874                 }
1875                 if (!vnn->iface) {
1876                         continue;
1877                 }
1878                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1879                                   ctdb_vnn_iface_string(vnn),
1880                                   ctdb_addr_to_str(&vnn->public_address),
1881                                   vnn->public_netmask_bits);
1882                 release_kill_clients(ctdb, &vnn->public_address);
1883                 ctdb_vnn_unassign_iface(ctdb, vnn);
1884         }
1885 }
1886
1887
1888 /*
1889   get list of public IPs
1890  */
1891 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1892                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1893 {
1894         int i, num, len;
1895         struct ctdb_all_public_ips *ips;
1896         struct ctdb_vnn *vnn;
1897         bool only_available = false;
1898
1899         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1900                 only_available = true;
1901         }
1902
1903         /* count how many public ip structures we have */
1904         num = 0;
1905         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1906                 num++;
1907         }
1908
1909         len = offsetof(struct ctdb_all_public_ips, ips) + 
1910                 num*sizeof(struct ctdb_public_ip);
1911         ips = talloc_zero_size(outdata, len);
1912         CTDB_NO_MEMORY(ctdb, ips);
1913
1914         i = 0;
1915         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1916                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1917                         continue;
1918                 }
1919                 ips->ips[i].pnn  = vnn->pnn;
1920                 ips->ips[i].addr = vnn->public_address;
1921                 i++;
1922         }
1923         ips->num = i;
1924         len = offsetof(struct ctdb_all_public_ips, ips) +
1925                 i*sizeof(struct ctdb_public_ip);
1926
1927         outdata->dsize = len;
1928         outdata->dptr  = (uint8_t *)ips;
1929
1930         return 0;
1931 }
1932
1933
1934 /*
1935   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
1936  */
1937 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
1938                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1939 {
1940         int i, num, len;
1941         struct ctdb_all_public_ipsv4 *ips;
1942         struct ctdb_vnn *vnn;
1943
1944         /* count how many public ip structures we have */
1945         num = 0;
1946         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1947                 if (vnn->public_address.sa.sa_family != AF_INET) {
1948                         continue;
1949                 }
1950                 num++;
1951         }
1952
1953         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
1954                 num*sizeof(struct ctdb_public_ipv4);
1955         ips = talloc_zero_size(outdata, len);
1956         CTDB_NO_MEMORY(ctdb, ips);
1957
1958         outdata->dsize = len;
1959         outdata->dptr  = (uint8_t *)ips;
1960
1961         ips->num = num;
1962         i = 0;
1963         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1964                 if (vnn->public_address.sa.sa_family != AF_INET) {
1965                         continue;
1966                 }
1967                 ips->ips[i].pnn = vnn->pnn;
1968                 ips->ips[i].sin = vnn->public_address.ip;
1969                 i++;
1970         }
1971
1972         return 0;
1973 }
1974
1975 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
1976                                         struct ctdb_req_control *c,
1977                                         TDB_DATA indata,
1978                                         TDB_DATA *outdata)
1979 {
1980         int i, num, len;
1981         ctdb_sock_addr *addr;
1982         struct ctdb_control_public_ip_info *info;
1983         struct ctdb_vnn *vnn;
1984
1985         addr = (ctdb_sock_addr *)indata.dptr;
1986
1987         vnn = find_public_ip_vnn(ctdb, addr);
1988         if (vnn == NULL) {
1989                 /* if it is not a public ip   it could be our 'single ip' */
1990                 if (ctdb->single_ip_vnn) {
1991                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
1992                                 vnn = ctdb->single_ip_vnn;
1993                         }
1994                 }
1995         }
1996         if (vnn == NULL) {
1997                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
1998                                  "'%s'not a public address\n",
1999                                  ctdb_addr_to_str(addr)));
2000                 return -1;
2001         }
2002
2003         /* count how many public ip structures we have */
2004         num = 0;
2005         for (;vnn->ifaces[num];) {
2006                 num++;
2007         }
2008
2009         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2010                 num*sizeof(struct ctdb_control_iface_info);
2011         info = talloc_zero_size(outdata, len);
2012         CTDB_NO_MEMORY(ctdb, info);
2013
2014         info->ip.addr = vnn->public_address;
2015         info->ip.pnn = vnn->pnn;
2016         info->active_idx = 0xFFFFFFFF;
2017
2018         for (i=0; vnn->ifaces[i]; i++) {
2019                 struct ctdb_iface *cur;
2020
2021                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2022                 if (cur == NULL) {
2023                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2024                                            vnn->ifaces[i]));
2025                         return -1;
2026                 }
2027                 if (vnn->iface == cur) {
2028                         info->active_idx = i;
2029                 }
2030                 strcpy(info->ifaces[i].name, cur->name);
2031                 info->ifaces[i].link_state = cur->link_up;
2032                 info->ifaces[i].references = cur->references;
2033         }
2034         info->num = i;
2035         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2036                 i*sizeof(struct ctdb_control_iface_info);
2037
2038         outdata->dsize = len;
2039         outdata->dptr  = (uint8_t *)info;
2040
2041         return 0;
2042 }
2043
2044 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2045                                 struct ctdb_req_control *c,
2046                                 TDB_DATA *outdata)
2047 {
2048         int i, num, len;
2049         struct ctdb_control_get_ifaces *ifaces;
2050         struct ctdb_iface *cur;
2051
2052         /* count how many public ip structures we have */
2053         num = 0;
2054         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2055                 num++;
2056         }
2057
2058         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2059                 num*sizeof(struct ctdb_control_iface_info);
2060         ifaces = talloc_zero_size(outdata, len);
2061         CTDB_NO_MEMORY(ctdb, ifaces);
2062
2063         i = 0;
2064         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2065                 strcpy(ifaces->ifaces[i].name, cur->name);
2066                 ifaces->ifaces[i].link_state = cur->link_up;
2067                 ifaces->ifaces[i].references = cur->references;
2068                 i++;
2069         }
2070         ifaces->num = i;
2071         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2072                 i*sizeof(struct ctdb_control_iface_info);
2073
2074         outdata->dsize = len;
2075         outdata->dptr  = (uint8_t *)ifaces;
2076
2077         return 0;
2078 }
2079
2080 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2081                                     struct ctdb_req_control *c,
2082                                     TDB_DATA indata)
2083 {
2084         struct ctdb_control_iface_info *info;
2085         struct ctdb_iface *iface;
2086         bool link_up = false;
2087
2088         info = (struct ctdb_control_iface_info *)indata.dptr;
2089
2090         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2091                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2092                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2093                                   len, len, info->name));
2094                 return -1;
2095         }
2096
2097         switch (info->link_state) {
2098         case 0:
2099                 link_up = false;
2100                 break;
2101         case 1:
2102                 link_up = true;
2103                 break;
2104         default:
2105                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2106                                   (unsigned int)info->link_state));
2107                 return -1;
2108         }
2109
2110         if (info->references != 0) {
2111                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2112                                   (unsigned int)info->references));
2113                 return -1;
2114         }
2115
2116         iface = ctdb_find_iface(ctdb, info->name);
2117         if (iface == NULL) {
2118                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2119                                   info->name));
2120                 return -1;
2121         }
2122
2123         if (link_up == iface->link_up) {
2124                 return 0;
2125         }
2126
2127         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2128               ("iface[%s] has changed it's link status %s => %s\n",
2129                iface->name,
2130                iface->link_up?"up":"down",
2131                link_up?"up":"down"));
2132
2133         iface->link_up = link_up;
2134         return 0;
2135 }
2136
2137
2138 /* 
2139    structure containing the listening socket and the list of tcp connections
2140    that the ctdb daemon is to kill
2141 */
2142 struct ctdb_kill_tcp {
2143         struct ctdb_vnn *vnn;
2144         struct ctdb_context *ctdb;
2145         int capture_fd;
2146         struct fd_event *fde;
2147         trbt_tree_t *connections;
2148         void *private_data;
2149 };
2150
2151 /*
2152   a tcp connection that is to be killed
2153  */
2154 struct ctdb_killtcp_con {
2155         ctdb_sock_addr src_addr;
2156         ctdb_sock_addr dst_addr;
2157         int count;
2158         struct ctdb_kill_tcp *killtcp;
2159 };
2160
2161 /* this function is used to create a key to represent this socketpair
2162    in the killtcp tree.
2163    this key is used to insert and lookup matching socketpairs that are
2164    to be tickled and RST
2165 */
2166 #define KILLTCP_KEYLEN  10
2167 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2168 {
2169         static uint32_t key[KILLTCP_KEYLEN];
2170
2171         bzero(key, sizeof(key));
2172
2173         if (src->sa.sa_family != dst->sa.sa_family) {
2174                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2175                 return key;
2176         }
2177         
2178         switch (src->sa.sa_family) {
2179         case AF_INET:
2180                 key[0]  = dst->ip.sin_addr.s_addr;
2181                 key[1]  = src->ip.sin_addr.s_addr;
2182                 key[2]  = dst->ip.sin_port;
2183                 key[3]  = src->ip.sin_port;
2184                 break;
2185         case AF_INET6:
2186                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2187                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2188                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2189                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2190                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2191                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2192                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2193                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2194                 key[8]  = dst->ip6.sin6_port;
2195                 key[9]  = src->ip6.sin6_port;
2196                 break;
2197         default:
2198                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2199                 return key;
2200         }
2201
2202         return key;
2203 }
2204
2205 /*
2206   called when we get a read event on the raw socket
2207  */
2208 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2209                                 uint16_t flags, void *private_data)
2210 {
2211         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2212         struct ctdb_killtcp_con *con;
2213         ctdb_sock_addr src, dst;
2214         uint32_t ack_seq, seq;
2215
2216         if (!(flags & EVENT_FD_READ)) {
2217                 return;
2218         }
2219
2220         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2221                                 killtcp->private_data,
2222                                 &src, &dst,
2223                                 &ack_seq, &seq) != 0) {
2224                 /* probably a non-tcp ACK packet */
2225                 return;
2226         }
2227
2228         /* check if we have this guy in our list of connections
2229            to kill
2230         */
2231         con = trbt_lookuparray32(killtcp->connections, 
2232                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2233         if (con == NULL) {
2234                 /* no this was some other packet we can just ignore */
2235                 return;
2236         }
2237
2238         /* This one has been tickled !
2239            now reset him and remove him from the list.
2240          */
2241         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2242                 ntohs(con->dst_addr.ip.sin_port),
2243                 ctdb_addr_to_str(&con->src_addr),
2244                 ntohs(con->src_addr.ip.sin_port)));
2245
2246         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2247         talloc_free(con);
2248 }
2249
2250
2251 /* when traversing the list of all tcp connections to send tickle acks to
2252    (so that we can capture the ack coming back and kill the connection
2253     by a RST)
2254    this callback is called for each connection we are currently trying to kill
2255 */
2256 static void tickle_connection_traverse(void *param, void *data)
2257 {
2258         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2259
2260         /* have tried too many times, just give up */
2261         if (con->count >= 5) {
2262                 talloc_free(con);
2263                 return;
2264         }
2265
2266         /* othervise, try tickling it again */
2267         con->count++;
2268         ctdb_sys_send_tcp(
2269                 (ctdb_sock_addr *)&con->dst_addr,
2270                 (ctdb_sock_addr *)&con->src_addr,
2271                 0, 0, 0);
2272 }
2273
2274
2275 /* 
2276    called every second until all sentenced connections have been reset
2277  */
2278 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2279                                               struct timeval t, void *private_data)
2280 {
2281         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2282
2283
2284         /* loop over all connections sending tickle ACKs */
2285         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, NULL);
2286
2287
2288         /* If there are no more connections to kill we can remove the
2289            entire killtcp structure
2290          */
2291         if ( (killtcp->connections == NULL) || 
2292              (killtcp->connections->root == NULL) ) {
2293                 talloc_free(killtcp);
2294                 return;
2295         }
2296
2297         /* try tickling them again in a seconds time
2298          */
2299         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2300                         ctdb_tickle_sentenced_connections, killtcp);
2301 }
2302
2303 /*
2304   destroy the killtcp structure
2305  */
2306 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2307 {
2308         if (killtcp->vnn) {
2309                 killtcp->vnn->killtcp = NULL;
2310         }
2311         return 0;
2312 }
2313
2314
2315 /* nothing fancy here, just unconditionally replace any existing
2316    connection structure with the new one.
2317
2318    dont even free the old one if it did exist, that one is talloc_stolen
2319    by the same node in the tree anyway and will be deleted when the new data 
2320    is deleted
2321 */
2322 static void *add_killtcp_callback(void *parm, void *data)
2323 {
2324         return parm;
2325 }
2326
2327 /*
2328   add a tcp socket to the list of connections we want to RST
2329  */
2330 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2331                                        ctdb_sock_addr *s,
2332                                        ctdb_sock_addr *d)
2333 {
2334         ctdb_sock_addr src, dst;
2335         struct ctdb_kill_tcp *killtcp;
2336         struct ctdb_killtcp_con *con;
2337         struct ctdb_vnn *vnn;
2338
2339         ctdb_canonicalize_ip(s, &src);
2340         ctdb_canonicalize_ip(d, &dst);
2341
2342         vnn = find_public_ip_vnn(ctdb, &dst);
2343         if (vnn == NULL) {
2344                 vnn = find_public_ip_vnn(ctdb, &src);
2345         }
2346         if (vnn == NULL) {
2347                 /* if it is not a public ip   it could be our 'single ip' */
2348                 if (ctdb->single_ip_vnn) {
2349                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2350                                 vnn = ctdb->single_ip_vnn;
2351                         }
2352                 }
2353         }
2354         if (vnn == NULL) {
2355                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2356                 return -1;
2357         }
2358
2359         killtcp = vnn->killtcp;
2360         
2361         /* If this is the first connection to kill we must allocate
2362            a new structure
2363          */
2364         if (killtcp == NULL) {
2365                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2366                 CTDB_NO_MEMORY(ctdb, killtcp);
2367
2368                 killtcp->vnn         = vnn;
2369                 killtcp->ctdb        = ctdb;
2370                 killtcp->capture_fd  = -1;
2371                 killtcp->connections = trbt_create(killtcp, 0);
2372
2373                 vnn->killtcp         = killtcp;
2374                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2375         }
2376
2377
2378
2379         /* create a structure that describes this connection we want to
2380            RST and store it in killtcp->connections
2381         */
2382         con = talloc(killtcp, struct ctdb_killtcp_con);
2383         CTDB_NO_MEMORY(ctdb, con);
2384         con->src_addr = src;
2385         con->dst_addr = dst;
2386         con->count    = 0;
2387         con->killtcp  = killtcp;
2388
2389
2390         trbt_insertarray32_callback(killtcp->connections,
2391                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2392                         add_killtcp_callback, con);
2393
2394         /* 
2395            If we dont have a socket to listen on yet we must create it
2396          */
2397         if (killtcp->capture_fd == -1) {
2398                 const char *iface = ctdb_vnn_iface_string(vnn);
2399                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2400                 if (killtcp->capture_fd == -1) {
2401                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2402                                           "socket on iface '%s' for killtcp (%s)\n",
2403                                           iface, strerror(errno)));
2404                         goto failed;
2405                 }
2406         }
2407
2408
2409         if (killtcp->fde == NULL) {
2410                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2411                                             EVENT_FD_READ,
2412                                             capture_tcp_handler, killtcp);
2413                 tevent_fd_set_auto_close(killtcp->fde);
2414
2415                 /* We also need to set up some events to tickle all these connections
2416                    until they are all reset
2417                 */
2418                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2419                                 ctdb_tickle_sentenced_connections, killtcp);
2420         }
2421
2422         /* tickle him once now */
2423         ctdb_sys_send_tcp(
2424                 &con->dst_addr,
2425                 &con->src_addr,
2426                 0, 0, 0);
2427
2428         return 0;
2429
2430 failed:
2431         talloc_free(vnn->killtcp);
2432         vnn->killtcp = NULL;
2433         return -1;
2434 }
2435
2436 /*
2437   kill a TCP connection.
2438  */
2439 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2440 {
2441         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2442
2443         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2444 }
2445
2446 /*
2447   called by a daemon to inform us of the entire list of TCP tickles for
2448   a particular public address.
2449   this control should only be sent by the node that is currently serving
2450   that public address.
2451  */
2452 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2453 {
2454         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2455         struct ctdb_tcp_array *tcparray;
2456         struct ctdb_vnn *vnn;
2457
2458         /* We must at least have tickles.num or else we cant verify the size
2459            of the received data blob
2460          */
2461         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2462                                         tickles.connections)) {
2463                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2464                 return -1;
2465         }
2466
2467         /* verify that the size of data matches what we expect */
2468         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2469                                 tickles.connections)
2470                          + sizeof(struct ctdb_tcp_connection)
2471                                  * list->tickles.num) {
2472                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2473                 return -1;
2474         }       
2475
2476         vnn = find_public_ip_vnn(ctdb, &list->addr);
2477         if (vnn == NULL) {
2478                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2479                         ctdb_addr_to_str(&list->addr)));
2480
2481                 return 1;
2482         }
2483
2484         /* remove any old ticklelist we might have */
2485         talloc_free(vnn->tcp_array);
2486         vnn->tcp_array = NULL;
2487
2488         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2489         CTDB_NO_MEMORY(ctdb, tcparray);
2490
2491         tcparray->num = list->tickles.num;
2492
2493         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2494         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2495
2496         memcpy(tcparray->connections, &list->tickles.connections[0], 
2497                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2498
2499         /* We now have a new fresh tickle list array for this vnn */
2500         vnn->tcp_array = talloc_steal(vnn, tcparray);
2501         
2502         return 0;
2503 }
2504
2505 /*
2506   called to return the full list of tickles for the puclic address associated 
2507   with the provided vnn
2508  */
2509 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2510 {
2511         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2512         struct ctdb_control_tcp_tickle_list *list;
2513         struct ctdb_tcp_array *tcparray;
2514         int num;
2515         struct ctdb_vnn *vnn;
2516
2517         vnn = find_public_ip_vnn(ctdb, addr);
2518         if (vnn == NULL) {
2519                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2520                         ctdb_addr_to_str(addr)));
2521
2522                 return 1;
2523         }
2524
2525         tcparray = vnn->tcp_array;
2526         if (tcparray) {
2527                 num = tcparray->num;
2528         } else {
2529                 num = 0;
2530         }
2531
2532         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2533                                 tickles.connections)
2534                         + sizeof(struct ctdb_tcp_connection) * num;
2535
2536         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2537         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2538         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2539
2540         list->addr = *addr;
2541         list->tickles.num = num;
2542         if (num) {
2543                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2544                         sizeof(struct ctdb_tcp_connection) * num);
2545         }
2546
2547         return 0;
2548 }
2549
2550
2551 /*
2552   set the list of all tcp tickles for a public address
2553  */
2554 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2555                               struct timeval timeout, uint32_t destnode, 
2556                               ctdb_sock_addr *addr,
2557                               struct ctdb_tcp_array *tcparray)
2558 {
2559         int ret, num;
2560         TDB_DATA data;
2561         struct ctdb_control_tcp_tickle_list *list;
2562
2563         if (tcparray) {
2564                 num = tcparray->num;
2565         } else {
2566                 num = 0;
2567         }
2568
2569         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2570                                 tickles.connections) +
2571                         sizeof(struct ctdb_tcp_connection) * num;
2572         data.dptr = talloc_size(ctdb, data.dsize);
2573         CTDB_NO_MEMORY(ctdb, data.dptr);
2574
2575         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2576         list->addr = *addr;
2577         list->tickles.num = num;
2578         if (tcparray) {
2579                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2580         }
2581
2582         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2583                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2584                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2585         if (ret != 0) {
2586                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2587                 return -1;
2588         }
2589
2590         talloc_free(data.dptr);
2591
2592         return ret;
2593 }
2594
2595
2596 /*
2597   perform tickle updates if required
2598  */
2599 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2600                                 struct timed_event *te, 
2601                                 struct timeval t, void *private_data)
2602 {
2603         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2604         int ret;
2605         struct ctdb_vnn *vnn;
2606
2607         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2608                 /* we only send out updates for public addresses that 
2609                    we have taken over
2610                  */
2611                 if (ctdb->pnn != vnn->pnn) {
2612                         continue;
2613                 }
2614                 /* We only send out the updates if we need to */
2615                 if (!vnn->tcp_update_needed) {
2616                         continue;
2617                 }
2618                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2619                                 TAKEOVER_TIMEOUT(),
2620                                 CTDB_BROADCAST_CONNECTED,
2621                                 &vnn->public_address,
2622                                 vnn->tcp_array);
2623                 if (ret != 0) {
2624                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2625                                 ctdb_addr_to_str(&vnn->public_address)));
2626                 }
2627         }
2628
2629         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2630                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2631                              ctdb_update_tcp_tickles, ctdb);
2632 }               
2633         
2634
2635 /*
2636   start periodic update of tcp tickles
2637  */
2638 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2639 {
2640         ctdb->tickle_update_context = talloc_new(ctdb);
2641
2642         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2643                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2644                              ctdb_update_tcp_tickles, ctdb);
2645 }
2646
2647
2648
2649
2650 struct control_gratious_arp {
2651         struct ctdb_context *ctdb;
2652         ctdb_sock_addr addr;
2653         const char *iface;
2654         int count;
2655 };
2656
2657 /*
2658   send a control_gratuitous arp
2659  */
2660 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2661                                   struct timeval t, void *private_data)
2662 {
2663         int ret;
2664         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2665                                                         struct control_gratious_arp);
2666
2667         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2668         if (ret != 0) {
2669                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2670                                  arp->iface, strerror(errno)));
2671         }
2672
2673
2674         arp->count++;
2675         if (arp->count == CTDB_ARP_REPEAT) {
2676                 talloc_free(arp);
2677                 return;
2678         }
2679
2680         event_add_timed(arp->ctdb->ev, arp, 
2681                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2682                         send_gratious_arp, arp);
2683 }
2684
2685
2686 /*
2687   send a gratious arp 
2688  */
2689 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2690 {
2691         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2692         struct control_gratious_arp *arp;
2693
2694         /* verify the size of indata */
2695         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2696                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2697                                  (unsigned)indata.dsize, 
2698                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2699                 return -1;
2700         }
2701         if (indata.dsize != 
2702                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2703                 + gratious_arp->len ) ){
2704
2705                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2706                         "but should be %u bytes\n", 
2707                          (unsigned)indata.dsize, 
2708                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2709                 return -1;
2710         }
2711
2712
2713         arp = talloc(ctdb, struct control_gratious_arp);
2714         CTDB_NO_MEMORY(ctdb, arp);
2715
2716         arp->ctdb  = ctdb;
2717         arp->addr   = gratious_arp->addr;
2718         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2719         CTDB_NO_MEMORY(ctdb, arp->iface);
2720         arp->count = 0;
2721         
2722         event_add_timed(arp->ctdb->ev, arp, 
2723                         timeval_zero(), send_gratious_arp, arp);
2724
2725         return 0;
2726 }
2727
2728 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2729 {
2730         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2731         int ret;
2732
2733         /* verify the size of indata */
2734         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2735                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2736                 return -1;
2737         }
2738         if (indata.dsize != 
2739                 ( offsetof(struct ctdb_control_ip_iface, iface)
2740                 + pub->len ) ){
2741
2742                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2743                         "but should be %u bytes\n", 
2744                          (unsigned)indata.dsize, 
2745                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2746                 return -1;
2747         }
2748
2749         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2750
2751         if (ret != 0) {
2752                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2753                 return -1;
2754         }
2755
2756         return 0;
2757 }
2758
2759 /*
2760   called when releaseip event finishes for del_public_address
2761  */
2762 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2763                                 void *private_data)
2764 {
2765         talloc_free(private_data);
2766 }
2767
2768 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2769 {
2770         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2771         struct ctdb_vnn *vnn;
2772         int ret;
2773
2774         /* verify the size of indata */
2775         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2776                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2777                 return -1;
2778         }
2779         if (indata.dsize != 
2780                 ( offsetof(struct ctdb_control_ip_iface, iface)
2781                 + pub->len ) ){
2782
2783                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2784                         "but should be %u bytes\n", 
2785                          (unsigned)indata.dsize, 
2786                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2787                 return -1;
2788         }
2789
2790         /* walk over all public addresses until we find a match */
2791         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2792                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2793                         TALLOC_CTX *mem_ctx;
2794
2795                         DLIST_REMOVE(ctdb->vnn, vnn);
2796                         if (vnn->iface == NULL) {
2797                                 talloc_free(vnn);
2798                                 return 0;
2799                         }
2800
2801                         mem_ctx = talloc_new(ctdb);
2802                         ret = ctdb_event_script_callback(ctdb, 
2803                                          mem_ctx, delete_ip_callback, mem_ctx,
2804                                          false,
2805                                          CTDB_EVENT_RELEASE_IP,
2806                                          "%s %s %u",
2807                                          ctdb_vnn_iface_string(vnn),
2808                                          ctdb_addr_to_str(&vnn->public_address),
2809                                          vnn->public_netmask_bits);
2810                         ctdb_vnn_unassign_iface(ctdb, vnn);
2811                         talloc_free(vnn);
2812                         if (ret != 0) {
2813                                 return -1;
2814                         }
2815                         return 0;
2816                 }
2817         }
2818
2819         return -1;
2820 }
2821
2822 /* This function is called from the recovery daemon to verify that a remote
2823    node has the expected ip allocation.
2824    This is verified against ctdb->ip_tree
2825 */
2826 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2827 {
2828         struct ctdb_public_ip_list *tmp_ip; 
2829         int i;
2830
2831         if (ctdb->ip_tree == NULL) {
2832                 /* dont know the expected allocation yet, assume remote node
2833                    is correct. */
2834                 return 0;
2835         }
2836
2837         if (ips == NULL) {
2838                 return 0;
2839         }
2840
2841         for (i=0; i<ips->num; i++) {
2842                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2843                 if (tmp_ip == NULL) {
2844                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2845                         return -1;
2846                 }
2847
2848                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2849                         continue;
2850                 }
2851
2852                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2853                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2854                         return -1;
2855                 }
2856         }
2857
2858         return 0;
2859 }
2860
2861 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2862 {
2863         struct ctdb_public_ip_list *tmp_ip; 
2864
2865         if (ctdb->ip_tree == NULL) {
2866                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2867                 return -1;
2868         }
2869
2870         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2871         if (tmp_ip == NULL) {
2872                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2873                 return -1;
2874         }
2875
2876         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2877         tmp_ip->pnn = ip->pnn;
2878
2879         return 0;
2880 }