682d17ba1d06fa791322519d1c525bf278516ba2
[sahlberg/ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334         TDB_DATA data;
335
336         if (status != 0) {
337                 if (status == -ETIME) {
338                         ctdb_ban_self(ctdb);
339                 }
340                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
341                                  ctdb_addr_to_str(&state->vnn->public_address),
342                                  ctdb_vnn_iface_string(state->vnn)));
343                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
344                 talloc_free(state);
345                 return;
346         }
347
348         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
349         if (ret != 0) {
350                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
351                 talloc_free(state);
352                 return;
353         }
354
355         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
356         data.dsize = strlen((char *)data.dptr) + 1;
357         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
358
359         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
360
361
362         /* the control succeeded */
363         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
364         talloc_free(state);
365         return;
366 }
367
368 /*
369   take over an ip address
370  */
371 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
372                               struct ctdb_req_control *c,
373                               struct ctdb_vnn *vnn)
374 {
375         int ret;
376         struct ctdb_do_takeip_state *state;
377
378         ret = ctdb_vnn_assign_iface(ctdb, vnn);
379         if (ret != 0) {
380                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
381                                  "assin a usable interface\n",
382                                  ctdb_addr_to_str(&vnn->public_address),
383                                  vnn->public_netmask_bits));
384                 return -1;
385         }
386
387         state = talloc(vnn, struct ctdb_do_takeip_state);
388         CTDB_NO_MEMORY(ctdb, state);
389
390         state->c = talloc_steal(ctdb, c);
391         state->vnn   = vnn;
392
393         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
394                             ctdb_addr_to_str(&vnn->public_address),
395                             vnn->public_netmask_bits,
396                             ctdb_vnn_iface_string(vnn)));
397
398         ret = ctdb_event_script_callback(ctdb,
399                                          state,
400                                          ctdb_do_takeip_callback,
401                                          state,
402                                          false,
403                                          CTDB_EVENT_TAKE_IP,
404                                          "%s %s %u",
405                                          ctdb_vnn_iface_string(vnn),
406                                          ctdb_addr_to_str(&vnn->public_address),
407                                          vnn->public_netmask_bits);
408
409         if (ret != 0) {
410                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
411                         ctdb_addr_to_str(&vnn->public_address),
412                         ctdb_vnn_iface_string(vnn)));
413                 talloc_free(state);
414                 return -1;
415         }
416
417         return 0;
418 }
419
420 struct ctdb_do_updateip_state {
421         struct ctdb_req_control *c;
422         struct ctdb_iface *old;
423         struct ctdb_vnn *vnn;
424 };
425
426 /*
427   called when updateip event finishes
428  */
429 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
430                                       void *private_data)
431 {
432         struct ctdb_do_updateip_state *state =
433                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
434         int32_t ret;
435
436         if (status != 0) {
437                 if (status == -ETIME) {
438                         ctdb_ban_self(ctdb);
439                 }
440                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
441                         ctdb_addr_to_str(&state->vnn->public_address),
442                         state->old->name,
443                         ctdb_vnn_iface_string(state->vnn)));
444
445                 /*
446                  * All we can do is reset the old interface
447                  * and let the next run fix it
448                  */
449                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
450                 state->vnn->iface = state->old;
451                 state->vnn->iface->references++;
452
453                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
454                 talloc_free(state);
455                 return;
456         }
457
458         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
459         if (ret != 0) {
460                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
461                 talloc_free(state);
462                 return;
463         }
464
465         /* the control succeeded */
466         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
467         talloc_free(state);
468         return;
469 }
470
471 /*
472   update (move) an ip address
473  */
474 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
475                                 struct ctdb_req_control *c,
476                                 struct ctdb_vnn *vnn)
477 {
478         int ret;
479         struct ctdb_do_updateip_state *state;
480         struct ctdb_iface *old = vnn->iface;
481
482         ctdb_vnn_unassign_iface(ctdb, vnn);
483         ret = ctdb_vnn_assign_iface(ctdb, vnn);
484         if (ret != 0) {
485                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
486                                  "assin a usable interface (old iface '%s')\n",
487                                  ctdb_addr_to_str(&vnn->public_address),
488                                  vnn->public_netmask_bits,
489                                  old->name));
490                 return -1;
491         }
492
493         state = talloc(vnn, struct ctdb_do_updateip_state);
494         CTDB_NO_MEMORY(ctdb, state);
495
496         state->c = talloc_steal(ctdb, c);
497         state->old = old;
498         state->vnn = vnn;
499
500         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
501                             "interface %s to %s\n",
502                             ctdb_addr_to_str(&vnn->public_address),
503                             vnn->public_netmask_bits,
504                             old->name,
505                             ctdb_vnn_iface_string(vnn)));
506
507         ret = ctdb_event_script_callback(ctdb,
508                                          state,
509                                          ctdb_do_updateip_callback,
510                                          state,
511                                          false,
512                                          CTDB_EVENT_UPDATE_IP,
513                                          "%s %s %s %u",
514                                          state->old->name,
515                                          ctdb_vnn_iface_string(vnn),
516                                          ctdb_addr_to_str(&vnn->public_address),
517                                          vnn->public_netmask_bits);
518         if (ret != 0) {
519                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
520                                  ctdb_addr_to_str(&vnn->public_address),
521                                  old->name, ctdb_vnn_iface_string(vnn)));
522                 talloc_free(state);
523                 return -1;
524         }
525
526         return 0;
527 }
528
529 /*
530   Find the vnn of the node that has a public ip address
531   returns -1 if the address is not known as a public address
532  */
533 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
534 {
535         struct ctdb_vnn *vnn;
536
537         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
538                 if (ctdb_same_ip(&vnn->public_address, addr)) {
539                         return vnn;
540                 }
541         }
542
543         return NULL;
544 }
545
546 /*
547   take over an ip address
548  */
549 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
550                                  struct ctdb_req_control *c,
551                                  TDB_DATA indata,
552                                  bool *async_reply)
553 {
554         int ret;
555         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
556         struct ctdb_vnn *vnn;
557         bool have_ip = false;
558         bool do_updateip = false;
559         bool do_takeip = false;
560         struct ctdb_iface *best_iface = NULL;
561
562         if (pip->pnn != ctdb->pnn) {
563                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
564                                  "with pnn %d, but we're node %d\n",
565                                  ctdb_addr_to_str(&pip->addr),
566                                  pip->pnn, ctdb->pnn));
567                 return -1;
568         }
569
570         /* update out vnn list */
571         vnn = find_public_ip_vnn(ctdb, &pip->addr);
572         if (vnn == NULL) {
573                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
574                         ctdb_addr_to_str(&pip->addr)));
575                 return 0;
576         }
577
578         have_ip = ctdb_sys_have_ip(&pip->addr);
579         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
580         if (best_iface == NULL) {
581                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
582                                  "a usable interface (old %s, have_ip %d)\n",
583                                  ctdb_addr_to_str(&vnn->public_address),
584                                  vnn->public_netmask_bits,
585                                  ctdb_vnn_iface_string(vnn),
586                                  have_ip));
587                 return -1;
588         }
589
590         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
591                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
592                 have_ip = false;
593         }
594
595         if (vnn->iface == NULL && have_ip) {
596                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
597                                   "but we have no interface assigned, has someone manually configured it?"
598                                   "banning ourself\n",
599                                  ctdb_addr_to_str(&vnn->public_address)));
600                 ctdb_ban_self(ctdb);
601                 return -1;
602         }
603
604         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
605                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
606                                   "and we have it on iface[%s], but it was assigned to node %d"
607                                   "and we are node %d, banning ourself\n",
608                                  ctdb_addr_to_str(&vnn->public_address),
609                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
610                 ctdb_ban_self(ctdb);
611                 return -1;
612         }
613
614         if (vnn->pnn == -1 && have_ip) {
615                 vnn->pnn = ctdb->pnn;
616                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
617                                   "and we already have it on iface[%s], update local daemon\n",
618                                  ctdb_addr_to_str(&vnn->public_address),
619                                   ctdb_vnn_iface_string(vnn)));
620                 return 0;
621         }
622
623         if (vnn->iface) {
624                 if (vnn->iface->link_up) {
625                         /* only move when the rebalance gains something */
626                         if (vnn->iface->references > (best_iface->references + 1)) {
627                                 do_updateip = true;
628                         }
629                 } else if (vnn->iface != best_iface) {
630                         do_updateip = true;
631                 }
632         }
633
634         if (!have_ip) {
635                 if (do_updateip) {
636                         ctdb_vnn_unassign_iface(ctdb, vnn);
637                         do_updateip = false;
638                 }
639                 do_takeip = true;
640         }
641
642         if (do_takeip) {
643                 ret = ctdb_do_takeip(ctdb, c, vnn);
644                 if (ret != 0) {
645                         return -1;
646                 }
647         } else if (do_updateip) {
648                 ret = ctdb_do_updateip(ctdb, c, vnn);
649                 if (ret != 0) {
650                         return -1;
651                 }
652         } else {
653                 /*
654                  * The interface is up and the kernel known the ip
655                  * => do nothing
656                  */
657                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
658                         ctdb_addr_to_str(&pip->addr),
659                         vnn->public_netmask_bits,
660                         ctdb_vnn_iface_string(vnn)));
661                 return 0;
662         }
663
664         /* tell ctdb_control.c that we will be replying asynchronously */
665         *async_reply = true;
666
667         return 0;
668 }
669
670 /*
671   takeover an ip address old v4 style
672  */
673 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
674                                 struct ctdb_req_control *c,
675                                 TDB_DATA indata, 
676                                 bool *async_reply)
677 {
678         TDB_DATA data;
679         
680         data.dsize = sizeof(struct ctdb_public_ip);
681         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
682         CTDB_NO_MEMORY(ctdb, data.dptr);
683         
684         memcpy(data.dptr, indata.dptr, indata.dsize);
685         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
686 }
687
688 /*
689   kill any clients that are registered with a IP that is being released
690  */
691 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
692 {
693         struct ctdb_client_ip *ip;
694
695         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
696                 ctdb_addr_to_str(addr)));
697
698         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
699                 ctdb_sock_addr tmp_addr;
700
701                 tmp_addr = ip->addr;
702                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
703                         ip->client_id,
704                         ctdb_addr_to_str(&ip->addr)));
705
706                 if (ctdb_same_ip(&tmp_addr, addr)) {
707                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
708                                                                      ip->client_id, 
709                                                                      struct ctdb_client);
710                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
711                                 ip->client_id,
712                                 ctdb_addr_to_str(&ip->addr),
713                                 client->pid));
714
715                         if (client->pid != 0) {
716                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
717                                         (unsigned)client->pid,
718                                         ctdb_addr_to_str(addr),
719                                         ip->client_id));
720                                 kill(client->pid, SIGKILL);
721                         }
722                 }
723         }
724 }
725
726 /*
727   called when releaseip event finishes
728  */
729 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
730                                 void *private_data)
731 {
732         struct takeover_callback_state *state = 
733                 talloc_get_type(private_data, struct takeover_callback_state);
734         TDB_DATA data;
735
736         if (status == -ETIME) {
737                 ctdb_ban_self(ctdb);
738         }
739
740         /* send a message to all clients of this node telling them
741            that the cluster has been reconfigured and they should
742            release any sockets on this IP */
743         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
744         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
745         data.dsize = strlen((char *)data.dptr)+1;
746
747         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
748
749         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
750
751         /* kill clients that have registered with this IP */
752         release_kill_clients(ctdb, state->addr);
753
754         ctdb_vnn_unassign_iface(ctdb, state->vnn);
755
756         /* the control succeeded */
757         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
758         talloc_free(state);
759 }
760
761 /*
762   release an ip address
763  */
764 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
765                                 struct ctdb_req_control *c,
766                                 TDB_DATA indata, 
767                                 bool *async_reply)
768 {
769         int ret;
770         struct takeover_callback_state *state;
771         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
772         struct ctdb_vnn *vnn;
773
774         /* update our vnn list */
775         vnn = find_public_ip_vnn(ctdb, &pip->addr);
776         if (vnn == NULL) {
777                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
778                         ctdb_addr_to_str(&pip->addr)));
779                 return 0;
780         }
781         vnn->pnn = pip->pnn;
782
783         /* stop any previous arps */
784         talloc_free(vnn->takeover_ctx);
785         vnn->takeover_ctx = NULL;
786
787         if (!ctdb_sys_have_ip(&pip->addr)) {
788                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
789                         ctdb_addr_to_str(&pip->addr),
790                         vnn->public_netmask_bits, 
791                         ctdb_vnn_iface_string(vnn)));
792                 ctdb_vnn_unassign_iface(ctdb, vnn);
793                 return 0;
794         }
795
796         if (vnn->iface == NULL) {
797                 DEBUG(DEBUG_CRIT,(__location__ " release_ip of IP %s is known to the kernel, "
798                                   "but we have no interface assigned, has someone manually configured it?"
799                                   "banning ourself\n",
800                                  ctdb_addr_to_str(&vnn->public_address)));
801                 ctdb_ban_self(ctdb);
802                 return -1;
803         }
804
805         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
806                 ctdb_addr_to_str(&pip->addr),
807                 vnn->public_netmask_bits, 
808                 ctdb_vnn_iface_string(vnn),
809                 pip->pnn));
810
811         state = talloc(ctdb, struct takeover_callback_state);
812         CTDB_NO_MEMORY(ctdb, state);
813
814         state->c = talloc_steal(state, c);
815         state->addr = talloc(state, ctdb_sock_addr);       
816         CTDB_NO_MEMORY(ctdb, state->addr);
817         *state->addr = pip->addr;
818         state->vnn   = vnn;
819
820         ret = ctdb_event_script_callback(ctdb, 
821                                          state, release_ip_callback, state,
822                                          false,
823                                          CTDB_EVENT_RELEASE_IP,
824                                          "%s %s %u",
825                                          ctdb_vnn_iface_string(vnn),
826                                          ctdb_addr_to_str(&pip->addr),
827                                          vnn->public_netmask_bits);
828         if (ret != 0) {
829                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
830                         ctdb_addr_to_str(&pip->addr),
831                         ctdb_vnn_iface_string(vnn)));
832                 talloc_free(state);
833                 return -1;
834         }
835
836         /* tell the control that we will be reply asynchronously */
837         *async_reply = true;
838         return 0;
839 }
840
841 /*
842   release an ip address old v4 style
843  */
844 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
845                                 struct ctdb_req_control *c,
846                                 TDB_DATA indata, 
847                                 bool *async_reply)
848 {
849         TDB_DATA data;
850         
851         data.dsize = sizeof(struct ctdb_public_ip);
852         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
853         CTDB_NO_MEMORY(ctdb, data.dptr);
854         
855         memcpy(data.dptr, indata.dptr, indata.dsize);
856         return ctdb_control_release_ip(ctdb, c, data, async_reply);
857 }
858
859
860 static int ctdb_add_public_address(struct ctdb_context *ctdb,
861                                    ctdb_sock_addr *addr,
862                                    unsigned mask, const char *ifaces)
863 {
864         struct ctdb_vnn      *vnn;
865         uint32_t num = 0;
866         char *tmp;
867         const char *iface;
868         int i;
869         int ret;
870
871         /* Verify that we dont have an entry for this ip yet */
872         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
873                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
874                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
875                                 ctdb_addr_to_str(addr)));
876                         return -1;
877                 }               
878         }
879
880         /* create a new vnn structure for this ip address */
881         vnn = talloc_zero(ctdb, struct ctdb_vnn);
882         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
883         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
884         tmp = talloc_strdup(vnn, ifaces);
885         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
886         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
887                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
888                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
889                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
890                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
891                 num++;
892         }
893         talloc_free(tmp);
894         vnn->ifaces[num] = NULL;
895         vnn->public_address      = *addr;
896         vnn->public_netmask_bits = mask;
897         vnn->pnn                 = -1;
898         if (ctdb_sys_have_ip(addr)) {
899                 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
900                 vnn->pnn = ctdb->pnn;
901         }
902
903         for (i=0; vnn->ifaces[i]; i++) {
904                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
905                 if (ret != 0) {
906                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
907                                            "for public_address[%s]\n",
908                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
909                         talloc_free(vnn);
910                         return -1;
911                 }
912                 if (i == 0) {
913                         vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
914                 }
915         }
916
917         DLIST_ADD(ctdb->vnn, vnn);
918
919         return 0;
920 }
921
922 /*
923   setup the event script directory
924 */
925 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
926 {
927         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
928         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
929         return 0;
930 }
931
932 /*
933   setup the public address lists from a file
934 */
935 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
936 {
937         char **lines;
938         int nlines;
939         int i;
940
941         lines = file_lines_load(alist, &nlines, ctdb);
942         if (lines == NULL) {
943                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
944                 return -1;
945         }
946         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
947                 nlines--;
948         }
949
950         for (i=0;i<nlines;i++) {
951                 unsigned mask;
952                 ctdb_sock_addr addr;
953                 const char *addrstr;
954                 const char *ifaces;
955                 char *tok, *line;
956
957                 line = lines[i];
958                 while ((*line == ' ') || (*line == '\t')) {
959                         line++;
960                 }
961                 if (*line == '#') {
962                         continue;
963                 }
964                 if (strcmp(line, "") == 0) {
965                         continue;
966                 }
967                 tok = strtok(line, " \t");
968                 addrstr = tok;
969                 tok = strtok(NULL, " \t");
970                 if (tok == NULL) {
971                         if (NULL == ctdb->default_public_interface) {
972                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
973                                          i+1));
974                                 talloc_free(lines);
975                                 return -1;
976                         }
977                         ifaces = ctdb->default_public_interface;
978                 } else {
979                         ifaces = tok;
980                 }
981
982                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
983                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
984                         talloc_free(lines);
985                         return -1;
986                 }
987                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
988                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
989                         talloc_free(lines);
990                         return -1;
991                 }
992         }
993
994         talloc_free(lines);
995         return 0;
996 }
997
998 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
999                               const char *iface,
1000                               const char *ip)
1001 {
1002         struct ctdb_vnn *svnn;
1003         bool ok;
1004         int ret;
1005
1006         svnn = talloc_zero(ctdb, struct ctdb_vnn);
1007         CTDB_NO_MEMORY(ctdb, svnn);
1008
1009         svnn->ifaces = talloc_array(svnn, const char *, 2);
1010         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1011         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1012         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1013         svnn->ifaces[1] = NULL;
1014
1015         ok = parse_ip(ip, iface, 0, &svnn->public_address);
1016         if (!ok) {
1017                 talloc_free(svnn);
1018                 return -1;
1019         }
1020
1021         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1022         if (ret != 0) {
1023                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1024                                    "for single_ip[%s]\n",
1025                                    svnn->ifaces[0],
1026                                    ctdb_addr_to_str(&svnn->public_address)));
1027                 talloc_free(svnn);
1028                 return -1;
1029         }
1030
1031         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1032         if (ret != 0) {
1033                 talloc_free(svnn);
1034                 return -1;
1035         }
1036
1037         ctdb->single_ip_vnn = svnn;
1038         return 0;
1039 }
1040
1041 struct ctdb_public_ip_list {
1042         struct ctdb_public_ip_list *next;
1043         uint32_t pnn;
1044         ctdb_sock_addr addr;
1045 };
1046
1047
1048 /* Given a physical node, return the number of
1049    public addresses that is currently assigned to this node.
1050 */
1051 static int node_ip_coverage(struct ctdb_context *ctdb, 
1052         int32_t pnn,
1053         struct ctdb_public_ip_list *ips)
1054 {
1055         int num=0;
1056
1057         for (;ips;ips=ips->next) {
1058                 if (ips->pnn == pnn) {
1059                         num++;
1060                 }
1061         }
1062         return num;
1063 }
1064
1065
1066 /* Check if this is a public ip known to the node, i.e. can that
1067    node takeover this ip ?
1068 */
1069 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1070                 struct ctdb_public_ip_list *ip)
1071 {
1072         struct ctdb_all_public_ips *public_ips;
1073         int i;
1074
1075         public_ips = ctdb->nodes[pnn]->available_public_ips;
1076
1077         if (public_ips == NULL) {
1078                 return -1;
1079         }
1080
1081         for (i=0;i<public_ips->num;i++) {
1082                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1083                         /* yes, this node can serve this public ip */
1084                         return 0;
1085                 }
1086         }
1087
1088         return -1;
1089 }
1090
1091
1092 /* search the node lists list for a node to takeover this ip.
1093    pick the node that currently are serving the least number of ips
1094    so that the ips get spread out evenly.
1095 */
1096 static int find_takeover_node(struct ctdb_context *ctdb, 
1097                 struct ctdb_node_map *nodemap, uint32_t mask, 
1098                 struct ctdb_public_ip_list *ip,
1099                 struct ctdb_public_ip_list *all_ips)
1100 {
1101         int pnn, min=0, num;
1102         int i;
1103
1104         pnn    = -1;
1105         for (i=0;i<nodemap->num;i++) {
1106                 if (nodemap->nodes[i].flags & mask) {
1107                         /* This node is not healty and can not be used to serve
1108                            a public address 
1109                         */
1110                         continue;
1111                 }
1112
1113                 /* verify that this node can serve this ip */
1114                 if (can_node_serve_ip(ctdb, i, ip)) {
1115                         /* no it couldnt   so skip to the next node */
1116                         continue;
1117                 }
1118
1119                 num = node_ip_coverage(ctdb, i, all_ips);
1120                 /* was this the first node we checked ? */
1121                 if (pnn == -1) {
1122                         pnn = i;
1123                         min  = num;
1124                 } else {
1125                         if (num < min) {
1126                                 pnn = i;
1127                                 min  = num;
1128                         }
1129                 }
1130         }       
1131         if (pnn == -1) {
1132                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1133                         ctdb_addr_to_str(&ip->addr)));
1134
1135                 return -1;
1136         }
1137
1138         ip->pnn = pnn;
1139         return 0;
1140 }
1141
1142 #define IP_KEYLEN       4
1143 static uint32_t *ip_key(ctdb_sock_addr *ip)
1144 {
1145         static uint32_t key[IP_KEYLEN];
1146
1147         bzero(key, sizeof(key));
1148
1149         switch (ip->sa.sa_family) {
1150         case AF_INET:
1151                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1152                 break;
1153         case AF_INET6:
1154                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1155                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1156                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1157                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1158                 break;
1159         default:
1160                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1161                 return key;
1162         }
1163
1164         return key;
1165 }
1166
1167 static void *add_ip_callback(void *parm, void *data)
1168 {
1169         struct ctdb_public_ip_list *this_ip = parm; 
1170         struct ctdb_public_ip_list *prev_ip = data; 
1171
1172         if (prev_ip == NULL) {
1173                 return parm;
1174         }
1175         if (this_ip->pnn == -1) {
1176                 this_ip->pnn = prev_ip->pnn;
1177         }
1178
1179         return parm;
1180 }
1181
1182 void getips_count_callback(void *param, void *data)
1183 {
1184         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1185         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1186
1187         new_ip->next = *ip_list;
1188         *ip_list     = new_ip;
1189 }
1190
1191 static struct ctdb_public_ip_list *
1192 create_merged_ip_list(struct ctdb_context *ctdb)
1193 {
1194         int i, j;
1195         struct ctdb_public_ip_list *ip_list;
1196         struct ctdb_all_public_ips *public_ips;
1197
1198         if (ctdb->ip_tree != NULL) {
1199                 talloc_free(ctdb->ip_tree);
1200                 ctdb->ip_tree = NULL;
1201         }
1202         ctdb->ip_tree = trbt_create(ctdb, 0);
1203
1204         for (i=0;i<ctdb->num_nodes;i++) {
1205                 public_ips = ctdb->nodes[i]->known_public_ips;
1206
1207                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1208                         continue;
1209                 }
1210
1211                 /* there were no public ips for this node */
1212                 if (public_ips == NULL) {
1213                         continue;
1214                 }               
1215
1216                 for (j=0;j<public_ips->num;j++) {
1217                         struct ctdb_public_ip_list *tmp_ip; 
1218
1219                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1220                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1221                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1222                         tmp_ip->addr = public_ips->ips[j].addr;
1223                         tmp_ip->next = NULL;
1224
1225                         trbt_insertarray32_callback(ctdb->ip_tree,
1226                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1227                                 add_ip_callback,
1228                                 tmp_ip);
1229                 }
1230         }
1231
1232         ip_list = NULL;
1233         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1234
1235         return ip_list;
1236 }
1237
1238 /*
1239   make any IP alias changes for public addresses that are necessary 
1240  */
1241 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1242 {
1243         int i, num_healthy, retries;
1244         struct ctdb_public_ip ip;
1245         struct ctdb_public_ipv4 ipv4;
1246         uint32_t mask, *nodes;
1247         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1248         int maxnode, maxnum=0, minnode, minnum=0, num;
1249         TDB_DATA data;
1250         struct timeval timeout;
1251         struct client_async_data *async_data;
1252         struct ctdb_client_control_state *state;
1253         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1254
1255         /*
1256          * ip failover is completely disabled, just send out the 
1257          * ipreallocated event.
1258          */
1259         if (ctdb->tunable.disable_ip_failover != 0) {
1260                 goto ipreallocated;
1261         }
1262
1263         ZERO_STRUCT(ip);
1264
1265         /* Count how many completely healthy nodes we have */
1266         num_healthy = 0;
1267         for (i=0;i<nodemap->num;i++) {
1268                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1269                         num_healthy++;
1270                 }
1271         }
1272
1273         if (num_healthy > 0) {
1274                 /* We have healthy nodes, so only consider them for 
1275                    serving public addresses
1276                 */
1277                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1278         } else {
1279                 /* We didnt have any completely healthy nodes so
1280                    use "disabled" nodes as a fallback
1281                 */
1282                 mask = NODE_FLAGS_INACTIVE;
1283         }
1284
1285         /* since nodes only know about those public addresses that
1286            can be served by that particular node, no single node has
1287            a full list of all public addresses that exist in the cluster.
1288            Walk over all node structures and create a merged list of
1289            all public addresses that exist in the cluster.
1290
1291            keep the tree of ips around as ctdb->ip_tree
1292         */
1293         all_ips = create_merged_ip_list(ctdb);
1294
1295         /* If we want deterministic ip allocations, i.e. that the ip addresses
1296            will always be allocated the same way for a specific set of
1297            available/unavailable nodes.
1298         */
1299         if (1 == ctdb->tunable.deterministic_public_ips) {              
1300                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1301                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1302                         tmp_ip->pnn = i%nodemap->num;
1303                 }
1304         }
1305
1306
1307         /* mark all public addresses with a masked node as being served by
1308            node -1
1309         */
1310         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1311                 if (tmp_ip->pnn == -1) {
1312                         continue;
1313                 }
1314                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1315                         tmp_ip->pnn = -1;
1316                 }
1317         }
1318
1319         /* verify that the assigned nodes can serve that public ip
1320            and set it to -1 if not
1321         */
1322         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1323                 if (tmp_ip->pnn == -1) {
1324                         continue;
1325                 }
1326                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1327                         /* this node can not serve this ip. */
1328                         tmp_ip->pnn = -1;
1329                 }
1330         }
1331
1332
1333         /* now we must redistribute all public addresses with takeover node
1334            -1 among the nodes available
1335         */
1336         retries = 0;
1337 try_again:
1338         /* loop over all ip's and find a physical node to cover for 
1339            each unassigned ip.
1340         */
1341         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1342                 if (tmp_ip->pnn == -1) {
1343                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1344                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1345                                         ctdb_addr_to_str(&tmp_ip->addr)));
1346                         }
1347                 }
1348         }
1349
1350         /* If we dont want ips to fail back after a node becomes healthy
1351            again, we wont even try to reallocat the ip addresses so that
1352            they are evenly spread out.
1353            This can NOT be used at the same time as DeterministicIPs !
1354         */
1355         if (1 == ctdb->tunable.no_ip_failback) {
1356                 if (1 == ctdb->tunable.deterministic_public_ips) {
1357                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1358                 }
1359                 goto finished;
1360         }
1361
1362
1363         /* now, try to make sure the ip adresses are evenly distributed
1364            across the node.
1365            for each ip address, loop over all nodes that can serve this
1366            ip and make sure that the difference between the node
1367            serving the most and the node serving the least ip's are not greater
1368            than 1.
1369         */
1370         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1371                 if (tmp_ip->pnn == -1) {
1372                         continue;
1373                 }
1374
1375                 /* Get the highest and lowest number of ips's served by any 
1376                    valid node which can serve this ip.
1377                 */
1378                 maxnode = -1;
1379                 minnode = -1;
1380                 for (i=0;i<nodemap->num;i++) {
1381                         if (nodemap->nodes[i].flags & mask) {
1382                                 continue;
1383                         }
1384
1385                         /* only check nodes that can actually serve this ip */
1386                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1387                                 /* no it couldnt   so skip to the next node */
1388                                 continue;
1389                         }
1390
1391                         num = node_ip_coverage(ctdb, i, all_ips);
1392                         if (maxnode == -1) {
1393                                 maxnode = i;
1394                                 maxnum  = num;
1395                         } else {
1396                                 if (num > maxnum) {
1397                                         maxnode = i;
1398                                         maxnum  = num;
1399                                 }
1400                         }
1401                         if (minnode == -1) {
1402                                 minnode = i;
1403                                 minnum  = num;
1404                         } else {
1405                                 if (num < minnum) {
1406                                         minnode = i;
1407                                         minnum  = num;
1408                                 }
1409                         }
1410                 }
1411                 if (maxnode == -1) {
1412                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1413                                 ctdb_addr_to_str(&tmp_ip->addr)));
1414
1415                         continue;
1416                 }
1417
1418                 /* If we want deterministic IPs then dont try to reallocate 
1419                    them to spread out the load.
1420                 */
1421                 if (1 == ctdb->tunable.deterministic_public_ips) {
1422                         continue;
1423                 }
1424
1425                 /* if the spread between the smallest and largest coverage by
1426                    a node is >=2 we steal one of the ips from the node with
1427                    most coverage to even things out a bit.
1428                    try to do this at most 5 times  since we dont want to spend
1429                    too much time balancing the ip coverage.
1430                 */
1431                 if ( (maxnum > minnum+1)
1432                   && (retries < 5) ){
1433                         struct ctdb_public_ip_list *tmp;
1434
1435                         /* mark one of maxnode's vnn's as unassigned and try
1436                            again
1437                         */
1438                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1439                                 if (tmp->pnn == maxnode) {
1440                                         tmp->pnn = -1;
1441                                         retries++;
1442                                         goto try_again;
1443                                 }
1444                         }
1445                 }
1446         }
1447
1448
1449         /* finished distributing the public addresses, now just send the 
1450            info out to the nodes
1451         */
1452 finished:
1453
1454         /* at this point ->pnn is the node which will own each IP
1455            or -1 if there is no node that can cover this ip
1456         */
1457
1458         /* now tell all nodes to delete any alias that they should not
1459            have.  This will be a NOOP on nodes that don't currently
1460            hold the given alias */
1461         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1462         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1463
1464         for (i=0;i<nodemap->num;i++) {
1465                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1466                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1467                         continue;
1468                 }
1469
1470                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1471                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1472                                 /* This node should be serving this
1473                                    vnn so dont tell it to release the ip
1474                                 */
1475                                 continue;
1476                         }
1477                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1478                                 ipv4.pnn = tmp_ip->pnn;
1479                                 ipv4.sin = tmp_ip->addr.ip;
1480
1481                                 timeout = TAKEOVER_TIMEOUT();
1482                                 data.dsize = sizeof(ipv4);
1483                                 data.dptr  = (uint8_t *)&ipv4;
1484                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1485                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1486                                                 data, async_data,
1487                                                 &timeout, NULL);
1488                         } else {
1489                                 ip.pnn  = tmp_ip->pnn;
1490                                 ip.addr = tmp_ip->addr;
1491
1492                                 timeout = TAKEOVER_TIMEOUT();
1493                                 data.dsize = sizeof(ip);
1494                                 data.dptr  = (uint8_t *)&ip;
1495                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1496                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1497                                                 data, async_data,
1498                                                 &timeout, NULL);
1499                         }
1500
1501                         if (state == NULL) {
1502                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1503                                 talloc_free(tmp_ctx);
1504                                 return -1;
1505                         }
1506                 
1507                         ctdb_client_async_add(async_data, state);
1508                 }
1509         }
1510         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1511                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1512                 talloc_free(tmp_ctx);
1513                 return -1;
1514         }
1515         talloc_free(async_data);
1516
1517
1518         /* tell all nodes to get their own IPs */
1519         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1520         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1521         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1522                 if (tmp_ip->pnn == -1) {
1523                         /* this IP won't be taken over */
1524                         continue;
1525                 }
1526
1527                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1528                         ipv4.pnn = tmp_ip->pnn;
1529                         ipv4.sin = tmp_ip->addr.ip;
1530
1531                         timeout = TAKEOVER_TIMEOUT();
1532                         data.dsize = sizeof(ipv4);
1533                         data.dptr  = (uint8_t *)&ipv4;
1534                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1535                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1536                                         data, async_data,
1537                                         &timeout, NULL);
1538                 } else {
1539                         ip.pnn  = tmp_ip->pnn;
1540                         ip.addr = tmp_ip->addr;
1541
1542                         timeout = TAKEOVER_TIMEOUT();
1543                         data.dsize = sizeof(ip);
1544                         data.dptr  = (uint8_t *)&ip;
1545                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1546                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1547                                         data, async_data,
1548                                         &timeout, NULL);
1549                 }
1550                 if (state == NULL) {
1551                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1552                         talloc_free(tmp_ctx);
1553                         return -1;
1554                 }
1555                 
1556                 ctdb_client_async_add(async_data, state);
1557         }
1558         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1559                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1560                 talloc_free(tmp_ctx);
1561                 return -1;
1562         }
1563
1564 ipreallocated:
1565         /* tell all nodes to update natwg */
1566         /* send the flags update natgw on all connected nodes */
1567         data.dptr  = discard_const("ipreallocated");
1568         data.dsize = strlen((char *)data.dptr) + 1; 
1569         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1570         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1571                                       nodes, 0, TAKEOVER_TIMEOUT(),
1572                                       false, data,
1573                                       NULL, NULL,
1574                                       NULL) != 0) {
1575                 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1576         }
1577
1578         talloc_free(tmp_ctx);
1579         return 0;
1580 }
1581
1582
1583 /*
1584   destroy a ctdb_client_ip structure
1585  */
1586 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1587 {
1588         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1589                 ctdb_addr_to_str(&ip->addr),
1590                 ntohs(ip->addr.ip.sin_port),
1591                 ip->client_id));
1592
1593         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1594         return 0;
1595 }
1596
1597 /*
1598   called by a client to inform us of a TCP connection that it is managing
1599   that should tickled with an ACK when IP takeover is done
1600   we handle both the old ipv4 style of packets as well as the new ipv4/6
1601   pdus.
1602  */
1603 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1604                                 TDB_DATA indata)
1605 {
1606         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1607         struct ctdb_control_tcp *old_addr = NULL;
1608         struct ctdb_control_tcp_addr new_addr;
1609         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1610         struct ctdb_tcp_list *tcp;
1611         struct ctdb_tcp_connection t;
1612         int ret;
1613         TDB_DATA data;
1614         struct ctdb_client_ip *ip;
1615         struct ctdb_vnn *vnn;
1616         ctdb_sock_addr addr;
1617
1618         switch (indata.dsize) {
1619         case sizeof(struct ctdb_control_tcp):
1620                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1621                 ZERO_STRUCT(new_addr);
1622                 tcp_sock = &new_addr;
1623                 tcp_sock->src.ip  = old_addr->src;
1624                 tcp_sock->dest.ip = old_addr->dest;
1625                 break;
1626         case sizeof(struct ctdb_control_tcp_addr):
1627                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1628                 break;
1629         default:
1630                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1631                                  "to ctdb_control_tcp_client. size was %d but "
1632                                  "only allowed sizes are %lu and %lu\n",
1633                                  (int)indata.dsize,
1634                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1635                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1636                 return -1;
1637         }
1638
1639         addr = tcp_sock->src;
1640         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1641         addr = tcp_sock->dest;
1642         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1643
1644         ZERO_STRUCT(addr);
1645         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1646         vnn = find_public_ip_vnn(ctdb, &addr);
1647         if (vnn == NULL) {
1648                 switch (addr.sa.sa_family) {
1649                 case AF_INET:
1650                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1651                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1652                                         ctdb_addr_to_str(&addr)));
1653                         }
1654                         break;
1655                 case AF_INET6:
1656                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1657                                 ctdb_addr_to_str(&addr)));
1658                         break;
1659                 default:
1660                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1661                 }
1662
1663                 return 0;
1664         }
1665
1666         if (vnn->pnn != ctdb->pnn) {
1667                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1668                         ctdb_addr_to_str(&addr),
1669                         client_id, client->pid));
1670                 /* failing this call will tell smbd to die */
1671                 return -1;
1672         }
1673
1674         ip = talloc(client, struct ctdb_client_ip);
1675         CTDB_NO_MEMORY(ctdb, ip);
1676
1677         ip->ctdb      = ctdb;
1678         ip->addr      = addr;
1679         ip->client_id = client_id;
1680         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1681         DLIST_ADD(ctdb->client_ip_list, ip);
1682
1683         tcp = talloc(client, struct ctdb_tcp_list);
1684         CTDB_NO_MEMORY(ctdb, tcp);
1685
1686         tcp->connection.src_addr = tcp_sock->src;
1687         tcp->connection.dst_addr = tcp_sock->dest;
1688
1689         DLIST_ADD(client->tcp_list, tcp);
1690
1691         t.src_addr = tcp_sock->src;
1692         t.dst_addr = tcp_sock->dest;
1693
1694         data.dptr = (uint8_t *)&t;
1695         data.dsize = sizeof(t);
1696
1697         switch (addr.sa.sa_family) {
1698         case AF_INET:
1699                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1700                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1701                         ctdb_addr_to_str(&tcp_sock->src),
1702                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1703                 break;
1704         case AF_INET6:
1705                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1706                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1707                         ctdb_addr_to_str(&tcp_sock->src),
1708                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1709                 break;
1710         default:
1711                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1712         }
1713
1714
1715         /* tell all nodes about this tcp connection */
1716         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1717                                        CTDB_CONTROL_TCP_ADD,
1718                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1719         if (ret != 0) {
1720                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1721                 return -1;
1722         }
1723
1724         return 0;
1725 }
1726
1727 /*
1728   find a tcp address on a list
1729  */
1730 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1731                                            struct ctdb_tcp_connection *tcp)
1732 {
1733         int i;
1734
1735         if (array == NULL) {
1736                 return NULL;
1737         }
1738
1739         for (i=0;i<array->num;i++) {
1740                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1741                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1742                         return &array->connections[i];
1743                 }
1744         }
1745         return NULL;
1746 }
1747
1748
1749
1750 /*
1751   called by a daemon to inform us of a TCP connection that one of its
1752   clients managing that should tickled with an ACK when IP takeover is
1753   done
1754  */
1755 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1756 {
1757         struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1758         struct ctdb_tcp_array *tcparray;
1759         struct ctdb_tcp_connection tcp;
1760         struct ctdb_vnn *vnn;
1761
1762         vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1763         if (vnn == NULL) {
1764                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1765                         ctdb_addr_to_str(&p->dst_addr)));
1766
1767                 return -1;
1768         }
1769
1770
1771         tcparray = vnn->tcp_array;
1772
1773         /* If this is the first tickle */
1774         if (tcparray == NULL) {
1775                 tcparray = talloc_size(ctdb->nodes, 
1776                         offsetof(struct ctdb_tcp_array, connections) +
1777                         sizeof(struct ctdb_tcp_connection) * 1);
1778                 CTDB_NO_MEMORY(ctdb, tcparray);
1779                 vnn->tcp_array = tcparray;
1780
1781                 tcparray->num = 0;
1782                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1783                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1784
1785                 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1786                 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1787                 tcparray->num++;
1788
1789                 if (tcp_update_needed) {
1790                         vnn->tcp_update_needed = true;
1791                 }
1792                 return 0;
1793         }
1794
1795
1796         /* Do we already have this tickle ?*/
1797         tcp.src_addr = p->src_addr;
1798         tcp.dst_addr = p->dst_addr;
1799         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1800                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1801                         ctdb_addr_to_str(&tcp.dst_addr),
1802                         ntohs(tcp.dst_addr.ip.sin_port),
1803                         vnn->pnn));
1804                 return 0;
1805         }
1806
1807         /* A new tickle, we must add it to the array */
1808         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1809                                         struct ctdb_tcp_connection,
1810                                         tcparray->num+1);
1811         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1812
1813         vnn->tcp_array = tcparray;
1814         tcparray->connections[tcparray->num].src_addr = p->src_addr;
1815         tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1816         tcparray->num++;
1817                                 
1818         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1819                 ctdb_addr_to_str(&tcp.dst_addr),
1820                 ntohs(tcp.dst_addr.ip.sin_port),
1821                 vnn->pnn));
1822
1823         if (tcp_update_needed) {
1824                 vnn->tcp_update_needed = true;
1825         }
1826
1827         return 0;
1828 }
1829
1830
1831 /*
1832   called by a daemon to inform us of a TCP connection that one of its
1833   clients managing that should tickled with an ACK when IP takeover is
1834   done
1835  */
1836 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1837 {
1838         struct ctdb_tcp_connection *tcpp;
1839         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1840
1841         if (vnn == NULL) {
1842                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1843                         ctdb_addr_to_str(&conn->dst_addr)));
1844                 return;
1845         }
1846
1847         /* if the array is empty we cant remove it
1848            and we dont need to do anything
1849          */
1850         if (vnn->tcp_array == NULL) {
1851                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1852                         ctdb_addr_to_str(&conn->dst_addr),
1853                         ntohs(conn->dst_addr.ip.sin_port)));
1854                 return;
1855         }
1856
1857
1858         /* See if we know this connection
1859            if we dont know this connection  then we dont need to do anything
1860          */
1861         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1862         if (tcpp == NULL) {
1863                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1864                         ctdb_addr_to_str(&conn->dst_addr),
1865                         ntohs(conn->dst_addr.ip.sin_port)));
1866                 return;
1867         }
1868
1869
1870         /* We need to remove this entry from the array.
1871            Instead of allocating a new array and copying data to it
1872            we cheat and just copy the last entry in the existing array
1873            to the entry that is to be removed and just shring the 
1874            ->num field
1875          */
1876         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1877         vnn->tcp_array->num--;
1878
1879         /* If we deleted the last entry we also need to remove the entire array
1880          */
1881         if (vnn->tcp_array->num == 0) {
1882                 talloc_free(vnn->tcp_array);
1883                 vnn->tcp_array = NULL;
1884         }               
1885
1886         vnn->tcp_update_needed = true;
1887
1888         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1889                 ctdb_addr_to_str(&conn->src_addr),
1890                 ntohs(conn->src_addr.ip.sin_port)));
1891 }
1892
1893
1894 /*
1895   called by a daemon to inform us of a TCP connection that one of its
1896   clients used are no longer needed in the tickle database
1897  */
1898 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1899 {
1900         struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1901
1902         ctdb_remove_tcp_connection(ctdb, conn);
1903
1904         return 0;
1905 }
1906
1907
1908 /*
1909   called when a daemon restarts - send all tickes for all public addresses
1910   we are serving immediately to the new node.
1911  */
1912 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1913 {
1914 /*XXX here we should send all tickes we are serving to the new node */
1915         return 0;
1916 }
1917
1918
1919 /*
1920   called when a client structure goes away - hook to remove
1921   elements from the tcp_list in all daemons
1922  */
1923 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1924 {
1925         while (client->tcp_list) {
1926                 struct ctdb_tcp_list *tcp = client->tcp_list;
1927                 DLIST_REMOVE(client->tcp_list, tcp);
1928                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1929         }
1930 }
1931
1932
1933 /*
1934   release all IPs on shutdown
1935  */
1936 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1937 {
1938         struct ctdb_vnn *vnn;
1939
1940         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1941                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1942                         ctdb_vnn_unassign_iface(ctdb, vnn);
1943                         continue;
1944                 }
1945                 if (!vnn->iface) {
1946                         continue;
1947                 }
1948                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1949                                   ctdb_vnn_iface_string(vnn),
1950                                   ctdb_addr_to_str(&vnn->public_address),
1951                                   vnn->public_netmask_bits);
1952                 release_kill_clients(ctdb, &vnn->public_address);
1953                 ctdb_vnn_unassign_iface(ctdb, vnn);
1954         }
1955 }
1956
1957
1958 /*
1959   get list of public IPs
1960  */
1961 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1962                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1963 {
1964         int i, num, len;
1965         struct ctdb_all_public_ips *ips;
1966         struct ctdb_vnn *vnn;
1967         bool only_available = false;
1968
1969         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1970                 only_available = true;
1971         }
1972
1973         /* count how many public ip structures we have */
1974         num = 0;
1975         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1976                 num++;
1977         }
1978
1979         len = offsetof(struct ctdb_all_public_ips, ips) + 
1980                 num*sizeof(struct ctdb_public_ip);
1981         ips = talloc_zero_size(outdata, len);
1982         CTDB_NO_MEMORY(ctdb, ips);
1983
1984         i = 0;
1985         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1986                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
1987                         continue;
1988                 }
1989                 ips->ips[i].pnn  = vnn->pnn;
1990                 ips->ips[i].addr = vnn->public_address;
1991                 i++;
1992         }
1993         ips->num = i;
1994         len = offsetof(struct ctdb_all_public_ips, ips) +
1995                 i*sizeof(struct ctdb_public_ip);
1996
1997         outdata->dsize = len;
1998         outdata->dptr  = (uint8_t *)ips;
1999
2000         return 0;
2001 }
2002
2003
2004 /*
2005   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
2006  */
2007 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
2008                                     struct ctdb_req_control *c, TDB_DATA *outdata)
2009 {
2010         int i, num, len;
2011         struct ctdb_all_public_ipsv4 *ips;
2012         struct ctdb_vnn *vnn;
2013
2014         /* count how many public ip structures we have */
2015         num = 0;
2016         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2017                 if (vnn->public_address.sa.sa_family != AF_INET) {
2018                         continue;
2019                 }
2020                 num++;
2021         }
2022
2023         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
2024                 num*sizeof(struct ctdb_public_ipv4);
2025         ips = talloc_zero_size(outdata, len);
2026         CTDB_NO_MEMORY(ctdb, ips);
2027
2028         outdata->dsize = len;
2029         outdata->dptr  = (uint8_t *)ips;
2030
2031         ips->num = num;
2032         i = 0;
2033         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2034                 if (vnn->public_address.sa.sa_family != AF_INET) {
2035                         continue;
2036                 }
2037                 ips->ips[i].pnn = vnn->pnn;
2038                 ips->ips[i].sin = vnn->public_address.ip;
2039                 i++;
2040         }
2041
2042         return 0;
2043 }
2044
2045 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2046                                         struct ctdb_req_control *c,
2047                                         TDB_DATA indata,
2048                                         TDB_DATA *outdata)
2049 {
2050         int i, num, len;
2051         ctdb_sock_addr *addr;
2052         struct ctdb_control_public_ip_info *info;
2053         struct ctdb_vnn *vnn;
2054
2055         addr = (ctdb_sock_addr *)indata.dptr;
2056
2057         vnn = find_public_ip_vnn(ctdb, addr);
2058         if (vnn == NULL) {
2059                 /* if it is not a public ip   it could be our 'single ip' */
2060                 if (ctdb->single_ip_vnn) {
2061                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2062                                 vnn = ctdb->single_ip_vnn;
2063                         }
2064                 }
2065         }
2066         if (vnn == NULL) {
2067                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2068                                  "'%s'not a public address\n",
2069                                  ctdb_addr_to_str(addr)));
2070                 return -1;
2071         }
2072
2073         /* count how many public ip structures we have */
2074         num = 0;
2075         for (;vnn->ifaces[num];) {
2076                 num++;
2077         }
2078
2079         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2080                 num*sizeof(struct ctdb_control_iface_info);
2081         info = talloc_zero_size(outdata, len);
2082         CTDB_NO_MEMORY(ctdb, info);
2083
2084         info->ip.addr = vnn->public_address;
2085         info->ip.pnn = vnn->pnn;
2086         info->active_idx = 0xFFFFFFFF;
2087
2088         for (i=0; vnn->ifaces[i]; i++) {
2089                 struct ctdb_iface *cur;
2090
2091                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2092                 if (cur == NULL) {
2093                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2094                                            vnn->ifaces[i]));
2095                         return -1;
2096                 }
2097                 if (vnn->iface == cur) {
2098                         info->active_idx = i;
2099                 }
2100                 strcpy(info->ifaces[i].name, cur->name);
2101                 info->ifaces[i].link_state = cur->link_up;
2102                 info->ifaces[i].references = cur->references;
2103         }
2104         info->num = i;
2105         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2106                 i*sizeof(struct ctdb_control_iface_info);
2107
2108         outdata->dsize = len;
2109         outdata->dptr  = (uint8_t *)info;
2110
2111         return 0;
2112 }
2113
2114 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2115                                 struct ctdb_req_control *c,
2116                                 TDB_DATA *outdata)
2117 {
2118         int i, num, len;
2119         struct ctdb_control_get_ifaces *ifaces;
2120         struct ctdb_iface *cur;
2121
2122         /* count how many public ip structures we have */
2123         num = 0;
2124         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2125                 num++;
2126         }
2127
2128         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2129                 num*sizeof(struct ctdb_control_iface_info);
2130         ifaces = talloc_zero_size(outdata, len);
2131         CTDB_NO_MEMORY(ctdb, ifaces);
2132
2133         i = 0;
2134         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2135                 strcpy(ifaces->ifaces[i].name, cur->name);
2136                 ifaces->ifaces[i].link_state = cur->link_up;
2137                 ifaces->ifaces[i].references = cur->references;
2138                 i++;
2139         }
2140         ifaces->num = i;
2141         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2142                 i*sizeof(struct ctdb_control_iface_info);
2143
2144         outdata->dsize = len;
2145         outdata->dptr  = (uint8_t *)ifaces;
2146
2147         return 0;
2148 }
2149
2150 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2151                                     struct ctdb_req_control *c,
2152                                     TDB_DATA indata)
2153 {
2154         struct ctdb_control_iface_info *info;
2155         struct ctdb_iface *iface;
2156         bool link_up = false;
2157
2158         info = (struct ctdb_control_iface_info *)indata.dptr;
2159
2160         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2161                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2162                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2163                                   len, len, info->name));
2164                 return -1;
2165         }
2166
2167         switch (info->link_state) {
2168         case 0:
2169                 link_up = false;
2170                 break;
2171         case 1:
2172                 link_up = true;
2173                 break;
2174         default:
2175                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2176                                   (unsigned int)info->link_state));
2177                 return -1;
2178         }
2179
2180         if (info->references != 0) {
2181                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2182                                   (unsigned int)info->references));
2183                 return -1;
2184         }
2185
2186         iface = ctdb_find_iface(ctdb, info->name);
2187         if (iface == NULL) {
2188                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2189                                   info->name));
2190                 return -1;
2191         }
2192
2193         if (link_up == iface->link_up) {
2194                 return 0;
2195         }
2196
2197         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2198               ("iface[%s] has changed it's link status %s => %s\n",
2199                iface->name,
2200                iface->link_up?"up":"down",
2201                link_up?"up":"down"));
2202
2203         iface->link_up = link_up;
2204         return 0;
2205 }
2206
2207
2208 /* 
2209    structure containing the listening socket and the list of tcp connections
2210    that the ctdb daemon is to kill
2211 */
2212 struct ctdb_kill_tcp {
2213         struct ctdb_vnn *vnn;
2214         struct ctdb_context *ctdb;
2215         int capture_fd;
2216         struct fd_event *fde;
2217         trbt_tree_t *connections;
2218         void *private_data;
2219 };
2220
2221 /*
2222   a tcp connection that is to be killed
2223  */
2224 struct ctdb_killtcp_con {
2225         ctdb_sock_addr src_addr;
2226         ctdb_sock_addr dst_addr;
2227         int count;
2228         struct ctdb_kill_tcp *killtcp;
2229 };
2230
2231 /* this function is used to create a key to represent this socketpair
2232    in the killtcp tree.
2233    this key is used to insert and lookup matching socketpairs that are
2234    to be tickled and RST
2235 */
2236 #define KILLTCP_KEYLEN  10
2237 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2238 {
2239         static uint32_t key[KILLTCP_KEYLEN];
2240
2241         bzero(key, sizeof(key));
2242
2243         if (src->sa.sa_family != dst->sa.sa_family) {
2244                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2245                 return key;
2246         }
2247         
2248         switch (src->sa.sa_family) {
2249         case AF_INET:
2250                 key[0]  = dst->ip.sin_addr.s_addr;
2251                 key[1]  = src->ip.sin_addr.s_addr;
2252                 key[2]  = dst->ip.sin_port;
2253                 key[3]  = src->ip.sin_port;
2254                 break;
2255         case AF_INET6:
2256                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2257                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2258                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2259                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2260                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2261                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2262                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2263                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2264                 key[8]  = dst->ip6.sin6_port;
2265                 key[9]  = src->ip6.sin6_port;
2266                 break;
2267         default:
2268                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2269                 return key;
2270         }
2271
2272         return key;
2273 }
2274
2275 /*
2276   called when we get a read event on the raw socket
2277  */
2278 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2279                                 uint16_t flags, void *private_data)
2280 {
2281         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2282         struct ctdb_killtcp_con *con;
2283         ctdb_sock_addr src, dst;
2284         uint32_t ack_seq, seq;
2285
2286         if (!(flags & EVENT_FD_READ)) {
2287                 return;
2288         }
2289
2290         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2291                                 killtcp->private_data,
2292                                 &src, &dst,
2293                                 &ack_seq, &seq) != 0) {
2294                 /* probably a non-tcp ACK packet */
2295                 return;
2296         }
2297
2298         /* check if we have this guy in our list of connections
2299            to kill
2300         */
2301         con = trbt_lookuparray32(killtcp->connections, 
2302                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2303         if (con == NULL) {
2304                 /* no this was some other packet we can just ignore */
2305                 return;
2306         }
2307
2308         /* This one has been tickled !
2309            now reset him and remove him from the list.
2310          */
2311         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2312                 ntohs(con->dst_addr.ip.sin_port),
2313                 ctdb_addr_to_str(&con->src_addr),
2314                 ntohs(con->src_addr.ip.sin_port)));
2315
2316         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2317         talloc_free(con);
2318 }
2319
2320
2321 /* when traversing the list of all tcp connections to send tickle acks to
2322    (so that we can capture the ack coming back and kill the connection
2323     by a RST)
2324    this callback is called for each connection we are currently trying to kill
2325 */
2326 static void tickle_connection_traverse(void *param, void *data)
2327 {
2328         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2329
2330         /* have tried too many times, just give up */
2331         if (con->count >= 5) {
2332                 /* can't delete in traverse: reparent to delete_cons */
2333                 talloc_steal(param, con);
2334                 return;
2335         }
2336
2337         /* othervise, try tickling it again */
2338         con->count++;
2339         ctdb_sys_send_tcp(
2340                 (ctdb_sock_addr *)&con->dst_addr,
2341                 (ctdb_sock_addr *)&con->src_addr,
2342                 0, 0, 0);
2343 }
2344
2345
2346 /* 
2347    called every second until all sentenced connections have been reset
2348  */
2349 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2350                                               struct timeval t, void *private_data)
2351 {
2352         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2353         void *delete_cons = talloc_new(NULL);
2354
2355         /* loop over all connections sending tickle ACKs */
2356         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2357
2358         /* now we've finished traverse, it's safe to do deletion. */
2359         talloc_free(delete_cons);
2360
2361         /* If there are no more connections to kill we can remove the
2362            entire killtcp structure
2363          */
2364         if ( (killtcp->connections == NULL) || 
2365              (killtcp->connections->root == NULL) ) {
2366                 talloc_free(killtcp);
2367                 return;
2368         }
2369
2370         /* try tickling them again in a seconds time
2371          */
2372         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2373                         ctdb_tickle_sentenced_connections, killtcp);
2374 }
2375
2376 /*
2377   destroy the killtcp structure
2378  */
2379 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2380 {
2381         if (killtcp->vnn) {
2382                 killtcp->vnn->killtcp = NULL;
2383         }
2384         return 0;
2385 }
2386
2387
2388 /* nothing fancy here, just unconditionally replace any existing
2389    connection structure with the new one.
2390
2391    dont even free the old one if it did exist, that one is talloc_stolen
2392    by the same node in the tree anyway and will be deleted when the new data 
2393    is deleted
2394 */
2395 static void *add_killtcp_callback(void *parm, void *data)
2396 {
2397         return parm;
2398 }
2399
2400 /*
2401   add a tcp socket to the list of connections we want to RST
2402  */
2403 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2404                                        ctdb_sock_addr *s,
2405                                        ctdb_sock_addr *d)
2406 {
2407         ctdb_sock_addr src, dst;
2408         struct ctdb_kill_tcp *killtcp;
2409         struct ctdb_killtcp_con *con;
2410         struct ctdb_vnn *vnn;
2411
2412         ctdb_canonicalize_ip(s, &src);
2413         ctdb_canonicalize_ip(d, &dst);
2414
2415         vnn = find_public_ip_vnn(ctdb, &dst);
2416         if (vnn == NULL) {
2417                 vnn = find_public_ip_vnn(ctdb, &src);
2418         }
2419         if (vnn == NULL) {
2420                 /* if it is not a public ip   it could be our 'single ip' */
2421                 if (ctdb->single_ip_vnn) {
2422                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2423                                 vnn = ctdb->single_ip_vnn;
2424                         }
2425                 }
2426         }
2427         if (vnn == NULL) {
2428                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2429                 return -1;
2430         }
2431
2432         killtcp = vnn->killtcp;
2433         
2434         /* If this is the first connection to kill we must allocate
2435            a new structure
2436          */
2437         if (killtcp == NULL) {
2438                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2439                 CTDB_NO_MEMORY(ctdb, killtcp);
2440
2441                 killtcp->vnn         = vnn;
2442                 killtcp->ctdb        = ctdb;
2443                 killtcp->capture_fd  = -1;
2444                 killtcp->connections = trbt_create(killtcp, 0);
2445
2446                 vnn->killtcp         = killtcp;
2447                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2448         }
2449
2450
2451
2452         /* create a structure that describes this connection we want to
2453            RST and store it in killtcp->connections
2454         */
2455         con = talloc(killtcp, struct ctdb_killtcp_con);
2456         CTDB_NO_MEMORY(ctdb, con);
2457         con->src_addr = src;
2458         con->dst_addr = dst;
2459         con->count    = 0;
2460         con->killtcp  = killtcp;
2461
2462
2463         trbt_insertarray32_callback(killtcp->connections,
2464                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2465                         add_killtcp_callback, con);
2466
2467         /* 
2468            If we dont have a socket to listen on yet we must create it
2469          */
2470         if (killtcp->capture_fd == -1) {
2471                 const char *iface = ctdb_vnn_iface_string(vnn);
2472                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2473                 if (killtcp->capture_fd == -1) {
2474                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2475                                           "socket on iface '%s' for killtcp (%s)\n",
2476                                           iface, strerror(errno)));
2477                         goto failed;
2478                 }
2479         }
2480
2481
2482         if (killtcp->fde == NULL) {
2483                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2484                                             EVENT_FD_READ,
2485                                             capture_tcp_handler, killtcp);
2486                 tevent_fd_set_auto_close(killtcp->fde);
2487
2488                 /* We also need to set up some events to tickle all these connections
2489                    until they are all reset
2490                 */
2491                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2492                                 ctdb_tickle_sentenced_connections, killtcp);
2493         }
2494
2495         /* tickle him once now */
2496         ctdb_sys_send_tcp(
2497                 &con->dst_addr,
2498                 &con->src_addr,
2499                 0, 0, 0);
2500
2501         return 0;
2502
2503 failed:
2504         talloc_free(vnn->killtcp);
2505         vnn->killtcp = NULL;
2506         return -1;
2507 }
2508
2509 /*
2510   kill a TCP connection.
2511  */
2512 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2513 {
2514         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2515
2516         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2517 }
2518
2519 /*
2520   called by a daemon to inform us of the entire list of TCP tickles for
2521   a particular public address.
2522   this control should only be sent by the node that is currently serving
2523   that public address.
2524  */
2525 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2526 {
2527         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2528         struct ctdb_tcp_array *tcparray;
2529         struct ctdb_vnn *vnn;
2530
2531         /* We must at least have tickles.num or else we cant verify the size
2532            of the received data blob
2533          */
2534         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2535                                         tickles.connections)) {
2536                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2537                 return -1;
2538         }
2539
2540         /* verify that the size of data matches what we expect */
2541         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2542                                 tickles.connections)
2543                          + sizeof(struct ctdb_tcp_connection)
2544                                  * list->tickles.num) {
2545                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2546                 return -1;
2547         }       
2548
2549         vnn = find_public_ip_vnn(ctdb, &list->addr);
2550         if (vnn == NULL) {
2551                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2552                         ctdb_addr_to_str(&list->addr)));
2553
2554                 return 1;
2555         }
2556
2557         /* remove any old ticklelist we might have */
2558         talloc_free(vnn->tcp_array);
2559         vnn->tcp_array = NULL;
2560
2561         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2562         CTDB_NO_MEMORY(ctdb, tcparray);
2563
2564         tcparray->num = list->tickles.num;
2565
2566         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2567         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2568
2569         memcpy(tcparray->connections, &list->tickles.connections[0], 
2570                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2571
2572         /* We now have a new fresh tickle list array for this vnn */
2573         vnn->tcp_array = talloc_steal(vnn, tcparray);
2574         
2575         return 0;
2576 }
2577
2578 /*
2579   called to return the full list of tickles for the puclic address associated 
2580   with the provided vnn
2581  */
2582 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2583 {
2584         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2585         struct ctdb_control_tcp_tickle_list *list;
2586         struct ctdb_tcp_array *tcparray;
2587         int num;
2588         struct ctdb_vnn *vnn;
2589
2590         vnn = find_public_ip_vnn(ctdb, addr);
2591         if (vnn == NULL) {
2592                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2593                         ctdb_addr_to_str(addr)));
2594
2595                 return 1;
2596         }
2597
2598         tcparray = vnn->tcp_array;
2599         if (tcparray) {
2600                 num = tcparray->num;
2601         } else {
2602                 num = 0;
2603         }
2604
2605         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2606                                 tickles.connections)
2607                         + sizeof(struct ctdb_tcp_connection) * num;
2608
2609         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2610         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2611         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2612
2613         list->addr = *addr;
2614         list->tickles.num = num;
2615         if (num) {
2616                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2617                         sizeof(struct ctdb_tcp_connection) * num);
2618         }
2619
2620         return 0;
2621 }
2622
2623
2624 /*
2625   set the list of all tcp tickles for a public address
2626  */
2627 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2628                               struct timeval timeout, uint32_t destnode, 
2629                               ctdb_sock_addr *addr,
2630                               struct ctdb_tcp_array *tcparray)
2631 {
2632         int ret, num;
2633         TDB_DATA data;
2634         struct ctdb_control_tcp_tickle_list *list;
2635
2636         if (tcparray) {
2637                 num = tcparray->num;
2638         } else {
2639                 num = 0;
2640         }
2641
2642         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2643                                 tickles.connections) +
2644                         sizeof(struct ctdb_tcp_connection) * num;
2645         data.dptr = talloc_size(ctdb, data.dsize);
2646         CTDB_NO_MEMORY(ctdb, data.dptr);
2647
2648         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2649         list->addr = *addr;
2650         list->tickles.num = num;
2651         if (tcparray) {
2652                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2653         }
2654
2655         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2656                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2657                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2658         if (ret != 0) {
2659                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2660                 return -1;
2661         }
2662
2663         talloc_free(data.dptr);
2664
2665         return ret;
2666 }
2667
2668
2669 /*
2670   perform tickle updates if required
2671  */
2672 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2673                                 struct timed_event *te, 
2674                                 struct timeval t, void *private_data)
2675 {
2676         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2677         int ret;
2678         struct ctdb_vnn *vnn;
2679
2680         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2681                 /* we only send out updates for public addresses that 
2682                    we have taken over
2683                  */
2684                 if (ctdb->pnn != vnn->pnn) {
2685                         continue;
2686                 }
2687                 /* We only send out the updates if we need to */
2688                 if (!vnn->tcp_update_needed) {
2689                         continue;
2690                 }
2691                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2692                                 TAKEOVER_TIMEOUT(),
2693                                 CTDB_BROADCAST_CONNECTED,
2694                                 &vnn->public_address,
2695                                 vnn->tcp_array);
2696                 if (ret != 0) {
2697                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2698                                 ctdb_addr_to_str(&vnn->public_address)));
2699                 }
2700         }
2701
2702         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2703                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2704                              ctdb_update_tcp_tickles, ctdb);
2705 }               
2706         
2707
2708 /*
2709   start periodic update of tcp tickles
2710  */
2711 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2712 {
2713         ctdb->tickle_update_context = talloc_new(ctdb);
2714
2715         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2716                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2717                              ctdb_update_tcp_tickles, ctdb);
2718 }
2719
2720
2721
2722
2723 struct control_gratious_arp {
2724         struct ctdb_context *ctdb;
2725         ctdb_sock_addr addr;
2726         const char *iface;
2727         int count;
2728 };
2729
2730 /*
2731   send a control_gratuitous arp
2732  */
2733 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2734                                   struct timeval t, void *private_data)
2735 {
2736         int ret;
2737         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2738                                                         struct control_gratious_arp);
2739
2740         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2741         if (ret != 0) {
2742                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2743                                  arp->iface, strerror(errno)));
2744         }
2745
2746
2747         arp->count++;
2748         if (arp->count == CTDB_ARP_REPEAT) {
2749                 talloc_free(arp);
2750                 return;
2751         }
2752
2753         event_add_timed(arp->ctdb->ev, arp, 
2754                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2755                         send_gratious_arp, arp);
2756 }
2757
2758
2759 /*
2760   send a gratious arp 
2761  */
2762 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2763 {
2764         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2765         struct control_gratious_arp *arp;
2766
2767         /* verify the size of indata */
2768         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2769                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2770                                  (unsigned)indata.dsize, 
2771                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2772                 return -1;
2773         }
2774         if (indata.dsize != 
2775                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2776                 + gratious_arp->len ) ){
2777
2778                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2779                         "but should be %u bytes\n", 
2780                          (unsigned)indata.dsize, 
2781                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2782                 return -1;
2783         }
2784
2785
2786         arp = talloc(ctdb, struct control_gratious_arp);
2787         CTDB_NO_MEMORY(ctdb, arp);
2788
2789         arp->ctdb  = ctdb;
2790         arp->addr   = gratious_arp->addr;
2791         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2792         CTDB_NO_MEMORY(ctdb, arp->iface);
2793         arp->count = 0;
2794         
2795         event_add_timed(arp->ctdb->ev, arp, 
2796                         timeval_zero(), send_gratious_arp, arp);
2797
2798         return 0;
2799 }
2800
2801 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2802 {
2803         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2804         int ret;
2805
2806         /* verify the size of indata */
2807         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2808                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2809                 return -1;
2810         }
2811         if (indata.dsize != 
2812                 ( offsetof(struct ctdb_control_ip_iface, iface)
2813                 + pub->len ) ){
2814
2815                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2816                         "but should be %u bytes\n", 
2817                          (unsigned)indata.dsize, 
2818                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2819                 return -1;
2820         }
2821
2822         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2823
2824         if (ret != 0) {
2825                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2826                 return -1;
2827         }
2828
2829         return 0;
2830 }
2831
2832 /*
2833   called when releaseip event finishes for del_public_address
2834  */
2835 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2836                                 void *private_data)
2837 {
2838         talloc_free(private_data);
2839 }
2840
2841 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2842 {
2843         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2844         struct ctdb_vnn *vnn;
2845         int ret;
2846
2847         /* verify the size of indata */
2848         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2849                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2850                 return -1;
2851         }
2852         if (indata.dsize != 
2853                 ( offsetof(struct ctdb_control_ip_iface, iface)
2854                 + pub->len ) ){
2855
2856                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2857                         "but should be %u bytes\n", 
2858                          (unsigned)indata.dsize, 
2859                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2860                 return -1;
2861         }
2862
2863         /* walk over all public addresses until we find a match */
2864         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2865                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2866                         TALLOC_CTX *mem_ctx;
2867
2868                         DLIST_REMOVE(ctdb->vnn, vnn);
2869                         if (vnn->iface == NULL) {
2870                                 talloc_free(vnn);
2871                                 return 0;
2872                         }
2873
2874                         mem_ctx = talloc_new(ctdb);
2875                         ret = ctdb_event_script_callback(ctdb, 
2876                                          mem_ctx, delete_ip_callback, mem_ctx,
2877                                          false,
2878                                          CTDB_EVENT_RELEASE_IP,
2879                                          "%s %s %u",
2880                                          ctdb_vnn_iface_string(vnn),
2881                                          ctdb_addr_to_str(&vnn->public_address),
2882                                          vnn->public_netmask_bits);
2883                         ctdb_vnn_unassign_iface(ctdb, vnn);
2884                         talloc_free(vnn);
2885                         if (ret != 0) {
2886                                 return -1;
2887                         }
2888                         return 0;
2889                 }
2890         }
2891
2892         return -1;
2893 }
2894
2895 /* This function is called from the recovery daemon to verify that a remote
2896    node has the expected ip allocation.
2897    This is verified against ctdb->ip_tree
2898 */
2899 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2900 {
2901         struct ctdb_public_ip_list *tmp_ip; 
2902         int i;
2903
2904         if (ctdb->ip_tree == NULL) {
2905                 /* dont know the expected allocation yet, assume remote node
2906                    is correct. */
2907                 return 0;
2908         }
2909
2910         if (ips == NULL) {
2911                 return 0;
2912         }
2913
2914         for (i=0; i<ips->num; i++) {
2915                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2916                 if (tmp_ip == NULL) {
2917                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2918                         return -1;
2919                 }
2920
2921                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2922                         continue;
2923                 }
2924
2925                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2926                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2927                         return -1;
2928                 }
2929         }
2930
2931         return 0;
2932 }
2933
2934 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2935 {
2936         struct ctdb_public_ip_list *tmp_ip; 
2937
2938         if (ctdb->ip_tree == NULL) {
2939                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2940                 return -1;
2941         }
2942
2943         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2944         if (tmp_ip == NULL) {
2945                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2946                 return -1;
2947         }
2948
2949         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2950         tmp_ip->pnn = ip->pnn;
2951
2952         return 0;
2953 }