eventscript: fix callback after free
[ctdb.git] / server / ctdb_takeover.c
1 /* 
2    ctdb ip takeover code
3
4    Copyright (C) Ronnie Sahlberg  2007
5    Copyright (C) Andrew Tridgell  2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 #include "includes.h"
21 #include "lib/tevent/tevent.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "lib/util/dlinklist.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "../common/rb_tree.h"
29
30
31 #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
32
33 #define CTDB_ARP_INTERVAL 1
34 #define CTDB_ARP_REPEAT   3
35
36 struct ctdb_iface {
37         struct ctdb_iface *prev, *next;
38         const char *name;
39         bool link_up;
40         uint32_t references;
41 };
42
43 static const char *ctdb_vnn_iface_string(const struct ctdb_vnn *vnn)
44 {
45         if (vnn->iface) {
46                 return vnn->iface->name;
47         }
48
49         return "__none__";
50 }
51
52 static int ctdb_add_local_iface(struct ctdb_context *ctdb, const char *iface)
53 {
54         struct ctdb_iface *i;
55
56         /* Verify that we dont have an entry for this ip yet */
57         for (i=ctdb->ifaces;i;i=i->next) {
58                 if (strcmp(i->name, iface) == 0) {
59                         return 0;
60                 }
61         }
62
63         /* create a new structure for this interface */
64         i = talloc_zero(ctdb, struct ctdb_iface);
65         CTDB_NO_MEMORY_FATAL(ctdb, i);
66         i->name = talloc_strdup(i, iface);
67         CTDB_NO_MEMORY(ctdb, i->name);
68         i->link_up = false;
69
70         DLIST_ADD(ctdb->ifaces, i);
71
72         return 0;
73 }
74
75 static struct ctdb_iface *ctdb_find_iface(struct ctdb_context *ctdb,
76                                           const char *iface)
77 {
78         struct ctdb_iface *i;
79
80         /* Verify that we dont have an entry for this ip yet */
81         for (i=ctdb->ifaces;i;i=i->next) {
82                 if (strcmp(i->name, iface) == 0) {
83                         return i;
84                 }
85         }
86
87         return NULL;
88 }
89
90 static struct ctdb_iface *ctdb_vnn_best_iface(struct ctdb_context *ctdb,
91                                               struct ctdb_vnn *vnn)
92 {
93         int i;
94         struct ctdb_iface *cur = NULL;
95         struct ctdb_iface *best = NULL;
96
97         for (i=0; vnn->ifaces[i]; i++) {
98
99                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
100                 if (cur == NULL) {
101                         continue;
102                 }
103
104                 if (!cur->link_up) {
105                         continue;
106                 }
107
108                 if (best == NULL) {
109                         best = cur;
110                         continue;
111                 }
112
113                 if (cur->references < best->references) {
114                         best = cur;
115                         continue;
116                 }
117         }
118
119         return best;
120 }
121
122 static int32_t ctdb_vnn_assign_iface(struct ctdb_context *ctdb,
123                                      struct ctdb_vnn *vnn)
124 {
125         struct ctdb_iface *best = NULL;
126
127         if (vnn->iface) {
128                 DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
129                                    "still assigned to iface '%s'\n",
130                                    ctdb_addr_to_str(&vnn->public_address),
131                                    ctdb_vnn_iface_string(vnn)));
132                 return 0;
133         }
134
135         best = ctdb_vnn_best_iface(ctdb, vnn);
136         if (best == NULL) {
137                 DEBUG(DEBUG_ERR, (__location__ " public address '%s' "
138                                   "cannot assign to iface any iface\n",
139                                   ctdb_addr_to_str(&vnn->public_address)));
140                 return -1;
141         }
142
143         vnn->iface = best;
144         best->references++;
145         vnn->pnn = ctdb->pnn;
146
147         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
148                            "now assigned to iface '%s' refs[%d]\n",
149                            ctdb_addr_to_str(&vnn->public_address),
150                            ctdb_vnn_iface_string(vnn),
151                            best->references));
152         return 0;
153 }
154
155 static void ctdb_vnn_unassign_iface(struct ctdb_context *ctdb,
156                                     struct ctdb_vnn *vnn)
157 {
158         DEBUG(DEBUG_INFO, (__location__ " public address '%s' "
159                            "now unassigned (old iface '%s' refs[%d])\n",
160                            ctdb_addr_to_str(&vnn->public_address),
161                            ctdb_vnn_iface_string(vnn),
162                            vnn->iface?vnn->iface->references:0));
163         if (vnn->iface) {
164                 vnn->iface->references--;
165         }
166         vnn->iface = NULL;
167         if (vnn->pnn == ctdb->pnn) {
168                 vnn->pnn = -1;
169         }
170 }
171
172 static bool ctdb_vnn_available(struct ctdb_context *ctdb,
173                                struct ctdb_vnn *vnn)
174 {
175         int i;
176
177         if (vnn->iface && vnn->iface->link_up) {
178                 return true;
179         }
180
181         for (i=0; vnn->ifaces[i]; i++) {
182                 struct ctdb_iface *cur;
183
184                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
185                 if (cur == NULL) {
186                         continue;
187                 }
188
189                 if (cur->link_up) {
190                         return true;
191                 }
192         }
193
194         return false;
195 }
196
197 struct ctdb_takeover_arp {
198         struct ctdb_context *ctdb;
199         uint32_t count;
200         ctdb_sock_addr addr;
201         struct ctdb_tcp_array *tcparray;
202         struct ctdb_vnn *vnn;
203 };
204
205
206 /*
207   lists of tcp endpoints
208  */
209 struct ctdb_tcp_list {
210         struct ctdb_tcp_list *prev, *next;
211         struct ctdb_tcp_connection connection;
212 };
213
214 /*
215   list of clients to kill on IP release
216  */
217 struct ctdb_client_ip {
218         struct ctdb_client_ip *prev, *next;
219         struct ctdb_context *ctdb;
220         ctdb_sock_addr addr;
221         uint32_t client_id;
222 };
223
224
225 /*
226   send a gratuitous arp
227  */
228 static void ctdb_control_send_arp(struct event_context *ev, struct timed_event *te, 
229                                   struct timeval t, void *private_data)
230 {
231         struct ctdb_takeover_arp *arp = talloc_get_type(private_data, 
232                                                         struct ctdb_takeover_arp);
233         int i, ret;
234         struct ctdb_tcp_array *tcparray;
235         const char *iface = ctdb_vnn_iface_string(arp->vnn);
236
237         ret = ctdb_sys_send_arp(&arp->addr, iface);
238         if (ret != 0) {
239                 DEBUG(DEBUG_CRIT,(__location__ " sending of arp failed on iface '%s' (%s)\n",
240                                   iface, strerror(errno)));
241         }
242
243         tcparray = arp->tcparray;
244         if (tcparray) {
245                 for (i=0;i<tcparray->num;i++) {
246                         struct ctdb_tcp_connection *tcon;
247
248                         tcon = &tcparray->connections[i];
249                         DEBUG(DEBUG_INFO,("sending tcp tickle ack for %u->%s:%u\n",
250                                 (unsigned)ntohs(tcon->dst_addr.ip.sin_port), 
251                                 ctdb_addr_to_str(&tcon->src_addr),
252                                 (unsigned)ntohs(tcon->src_addr.ip.sin_port)));
253                         ret = ctdb_sys_send_tcp(
254                                 &tcon->src_addr, 
255                                 &tcon->dst_addr,
256                                 0, 0, 0);
257                         if (ret != 0) {
258                                 DEBUG(DEBUG_CRIT,(__location__ " Failed to send tcp tickle ack for %s\n",
259                                         ctdb_addr_to_str(&tcon->src_addr)));
260                         }
261                 }
262         }
263
264         arp->count++;
265
266         if (arp->count == CTDB_ARP_REPEAT) {
267                 talloc_free(arp);
268                 return;
269         }
270
271         event_add_timed(arp->ctdb->ev, arp->vnn->takeover_ctx, 
272                         timeval_current_ofs(CTDB_ARP_INTERVAL, 100000), 
273                         ctdb_control_send_arp, arp);
274 }
275
276 static int32_t ctdb_announce_vnn_iface(struct ctdb_context *ctdb,
277                                        struct ctdb_vnn *vnn)
278 {
279         struct ctdb_takeover_arp *arp;
280         struct ctdb_tcp_array *tcparray;
281
282         if (!vnn->takeover_ctx) {
283                 vnn->takeover_ctx = talloc_new(vnn);
284                 if (!vnn->takeover_ctx) {
285                         return -1;
286                 }
287         }
288
289         arp = talloc_zero(vnn->takeover_ctx, struct ctdb_takeover_arp);
290         if (!arp) {
291                 return -1;
292         }
293
294         arp->ctdb = ctdb;
295         arp->addr = vnn->public_address;
296         arp->vnn  = vnn;
297
298         tcparray = vnn->tcp_array;
299         if (tcparray) {
300                 /* add all of the known tcp connections for this IP to the
301                    list of tcp connections to send tickle acks for */
302                 arp->tcparray = talloc_steal(arp, tcparray);
303
304                 vnn->tcp_array = NULL;
305                 vnn->tcp_update_needed = true;
306         }
307
308         event_add_timed(arp->ctdb->ev, vnn->takeover_ctx,
309                         timeval_zero(), ctdb_control_send_arp, arp);
310
311         return 0;
312 }
313
314 struct takeover_callback_state {
315         struct ctdb_req_control *c;
316         ctdb_sock_addr *addr;
317         struct ctdb_vnn *vnn;
318 };
319
320 struct ctdb_do_takeip_state {
321         struct ctdb_req_control *c;
322         struct ctdb_vnn *vnn;
323 };
324
325 /*
326   called when takeip event finishes
327  */
328 static void ctdb_do_takeip_callback(struct ctdb_context *ctdb, int status,
329                                     void *private_data)
330 {
331         struct ctdb_do_takeip_state *state =
332                 talloc_get_type(private_data, struct ctdb_do_takeip_state);
333         int32_t ret;
334         TDB_DATA data;
335
336         if (status != 0) {
337                 struct ctdb_node *node = ctdb->nodes[ctdb->pnn];
338         
339                 if (status == -ETIME) {
340                         ctdb_ban_self(ctdb);
341                 }
342                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
343                                  ctdb_addr_to_str(&state->vnn->public_address),
344                                  ctdb_vnn_iface_string(state->vnn)));
345                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
346
347                 node->flags |= NODE_FLAGS_UNHEALTHY;
348                 talloc_free(state);
349                 return;
350         }
351
352         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
353         if (ret != 0) {
354                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
355                 talloc_free(state);
356                 return;
357         }
358
359         data.dptr  = (uint8_t *)ctdb_addr_to_str(&state->vnn->public_address);
360         data.dsize = strlen((char *)data.dptr) + 1;
361         DEBUG(DEBUG_INFO,(__location__ " sending TAKE_IP for '%s'\n", data.dptr));
362
363         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_TAKE_IP, data);
364
365
366         /* the control succeeded */
367         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
368         talloc_free(state);
369         return;
370 }
371
372 /*
373   take over an ip address
374  */
375 static int32_t ctdb_do_takeip(struct ctdb_context *ctdb,
376                               struct ctdb_req_control *c,
377                               struct ctdb_vnn *vnn)
378 {
379         int ret;
380         struct ctdb_do_takeip_state *state;
381
382         ret = ctdb_vnn_assign_iface(ctdb, vnn);
383         if (ret != 0) {
384                 DEBUG(DEBUG_ERR,("Takeover of IP %s/%u failed to "
385                                  "assin a usable interface\n",
386                                  ctdb_addr_to_str(&vnn->public_address),
387                                  vnn->public_netmask_bits));
388                 return -1;
389         }
390
391         state = talloc(vnn, struct ctdb_do_takeip_state);
392         CTDB_NO_MEMORY(ctdb, state);
393
394         state->c = talloc_steal(ctdb, c);
395         state->vnn   = vnn;
396
397         DEBUG(DEBUG_NOTICE,("Takeover of IP %s/%u on interface %s\n",
398                             ctdb_addr_to_str(&vnn->public_address),
399                             vnn->public_netmask_bits,
400                             ctdb_vnn_iface_string(vnn)));
401
402         ret = ctdb_event_script_callback(ctdb,
403                                          state,
404                                          ctdb_do_takeip_callback,
405                                          state,
406                                          false,
407                                          CTDB_EVENT_TAKE_IP,
408                                          "%s %s %u",
409                                          ctdb_vnn_iface_string(vnn),
410                                          ctdb_addr_to_str(&vnn->public_address),
411                                          vnn->public_netmask_bits);
412
413         if (ret != 0) {
414                 DEBUG(DEBUG_ERR,(__location__ " Failed to takeover IP %s on interface %s\n",
415                         ctdb_addr_to_str(&vnn->public_address),
416                         ctdb_vnn_iface_string(vnn)));
417                 talloc_free(state);
418                 return -1;
419         }
420
421         return 0;
422 }
423
424 struct ctdb_do_updateip_state {
425         struct ctdb_req_control *c;
426         struct ctdb_iface *old;
427         struct ctdb_vnn *vnn;
428 };
429
430 /*
431   called when updateip event finishes
432  */
433 static void ctdb_do_updateip_callback(struct ctdb_context *ctdb, int status,
434                                       void *private_data)
435 {
436         struct ctdb_do_updateip_state *state =
437                 talloc_get_type(private_data, struct ctdb_do_updateip_state);
438         int32_t ret;
439
440         if (status != 0) {
441                 if (status == -ETIME) {
442                         ctdb_ban_self(ctdb);
443                 }
444                 DEBUG(DEBUG_ERR,(__location__ " Failed to move IP %s from interface %s to %s\n",
445                         ctdb_addr_to_str(&state->vnn->public_address),
446                         state->old->name,
447                         ctdb_vnn_iface_string(state->vnn)));
448
449                 /*
450                  * All we can do is reset the old interface
451                  * and let the next run fix it
452                  */
453                 ctdb_vnn_unassign_iface(ctdb, state->vnn);
454                 state->vnn->iface = state->old;
455                 state->vnn->iface->references++;
456
457                 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
458                 talloc_free(state);
459                 return;
460         }
461
462         ret = ctdb_announce_vnn_iface(ctdb, state->vnn);
463         if (ret != 0) {
464                 ctdb_request_control_reply(ctdb, state->c, NULL, -1, NULL);
465                 talloc_free(state);
466                 return;
467         }
468
469         /* the control succeeded */
470         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
471         talloc_free(state);
472         return;
473 }
474
475 /*
476   update (move) an ip address
477  */
478 static int32_t ctdb_do_updateip(struct ctdb_context *ctdb,
479                                 struct ctdb_req_control *c,
480                                 struct ctdb_vnn *vnn)
481 {
482         int ret;
483         struct ctdb_do_updateip_state *state;
484         struct ctdb_iface *old = vnn->iface;
485         char *new_name;
486
487         ctdb_vnn_unassign_iface(ctdb, vnn);
488         ret = ctdb_vnn_assign_iface(ctdb, vnn);
489         if (ret != 0) {
490                 DEBUG(DEBUG_ERR,("update of IP %s/%u failed to "
491                                  "assin a usable interface (old iface '%s')\n",
492                                  ctdb_addr_to_str(&vnn->public_address),
493                                  vnn->public_netmask_bits,
494                                  old->name));
495                 return -1;
496         }
497
498         new_name = ctdb_vnn_iface_string(vnn);
499         if (old->name != NULL && new_name != NULL && !strcmp(old->name, new_name)) {
500                 /* A benign update from one interface onto itself.
501                  * no need to run the eventscripts in this case, just return
502                  * success.
503                  */
504                 ctdb_request_control_reply(ctdb, c, NULL, 0, NULL);
505                 return 0;
506         }
507
508         state = talloc(vnn, struct ctdb_do_updateip_state);
509         CTDB_NO_MEMORY(ctdb, state);
510
511         state->c = talloc_steal(ctdb, c);
512         state->old = old;
513         state->vnn = vnn;
514
515         DEBUG(DEBUG_NOTICE,("Update of IP %s/%u from "
516                             "interface %s to %s\n",
517                             ctdb_addr_to_str(&vnn->public_address),
518                             vnn->public_netmask_bits,
519                             old->name,
520                             new_name));
521
522         ret = ctdb_event_script_callback(ctdb,
523                                          state,
524                                          ctdb_do_updateip_callback,
525                                          state,
526                                          false,
527                                          CTDB_EVENT_UPDATE_IP,
528                                          "%s %s %s %u",
529                                          state->old->name,
530                                          new_name,
531                                          ctdb_addr_to_str(&vnn->public_address),
532                                          vnn->public_netmask_bits);
533         if (ret != 0) {
534                 DEBUG(DEBUG_ERR,(__location__ " Failed update IP %s from interface %s to %s\n",
535                                  ctdb_addr_to_str(&vnn->public_address),
536                                  old->name, new_name));
537                 talloc_free(state);
538                 return -1;
539         }
540
541         return 0;
542 }
543
544 /*
545   Find the vnn of the node that has a public ip address
546   returns -1 if the address is not known as a public address
547  */
548 static struct ctdb_vnn *find_public_ip_vnn(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
549 {
550         struct ctdb_vnn *vnn;
551
552         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
553                 if (ctdb_same_ip(&vnn->public_address, addr)) {
554                         return vnn;
555                 }
556         }
557
558         return NULL;
559 }
560
561 /*
562   take over an ip address
563  */
564 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
565                                  struct ctdb_req_control *c,
566                                  TDB_DATA indata,
567                                  bool *async_reply)
568 {
569         int ret;
570         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
571         struct ctdb_vnn *vnn;
572         bool have_ip = false;
573         bool do_updateip = false;
574         bool do_takeip = false;
575         struct ctdb_iface *best_iface = NULL;
576
577         if (pip->pnn != ctdb->pnn) {
578                 DEBUG(DEBUG_ERR,(__location__" takeoverip called for an ip '%s' "
579                                  "with pnn %d, but we're node %d\n",
580                                  ctdb_addr_to_str(&pip->addr),
581                                  pip->pnn, ctdb->pnn));
582                 return -1;
583         }
584
585         /* update out vnn list */
586         vnn = find_public_ip_vnn(ctdb, &pip->addr);
587         if (vnn == NULL) {
588                 DEBUG(DEBUG_INFO,("takeoverip called for an ip '%s' that is not a public address\n",
589                         ctdb_addr_to_str(&pip->addr)));
590                 return 0;
591         }
592
593         have_ip = ctdb_sys_have_ip(&pip->addr);
594         best_iface = ctdb_vnn_best_iface(ctdb, vnn);
595         if (best_iface == NULL) {
596                 DEBUG(DEBUG_ERR,("takeoverip of IP %s/%u failed to find"
597                                  "a usable interface (old %s, have_ip %d)\n",
598                                  ctdb_addr_to_str(&vnn->public_address),
599                                  vnn->public_netmask_bits,
600                                  ctdb_vnn_iface_string(vnn),
601                                  have_ip));
602                 return -1;
603         }
604
605         if (vnn->iface == NULL && vnn->pnn == -1 && have_ip && best_iface != NULL) {
606                 DEBUG(DEBUG_ERR,("Taking over newly created ip\n"));
607                 have_ip = false;
608         }
609
610         if (vnn->iface == NULL && have_ip) {
611                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
612                                   "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
613                                  ctdb_addr_to_str(&vnn->public_address)));
614                 return 0;
615         }
616
617         if (vnn->pnn != ctdb->pnn && have_ip && vnn->pnn != -1) {
618                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
619                                   "and we have it on iface[%s], but it was assigned to node %d"
620                                   "and we are node %d, banning ourself\n",
621                                  ctdb_addr_to_str(&vnn->public_address),
622                                  ctdb_vnn_iface_string(vnn), vnn->pnn, ctdb->pnn));
623                 ctdb_ban_self(ctdb);
624                 return -1;
625         }
626
627         if (vnn->pnn == -1 && have_ip) {
628                 vnn->pnn = ctdb->pnn;
629                 DEBUG(DEBUG_CRIT,(__location__ " takeoverip of IP %s is known to the kernel, "
630                                   "and we already have it on iface[%s], update local daemon\n",
631                                  ctdb_addr_to_str(&vnn->public_address),
632                                   ctdb_vnn_iface_string(vnn)));
633                 return 0;
634         }
635
636         if (vnn->iface) {
637                 if (vnn->iface->link_up) {
638                         /* only move when the rebalance gains something */
639                         if (vnn->iface->references > (best_iface->references + 1)) {
640                                 do_updateip = true;
641                         }
642                 } else if (vnn->iface != best_iface) {
643                         do_updateip = true;
644                 }
645         }
646
647         if (!have_ip) {
648                 if (do_updateip) {
649                         ctdb_vnn_unassign_iface(ctdb, vnn);
650                         do_updateip = false;
651                 }
652                 do_takeip = true;
653         }
654
655         if (do_takeip) {
656                 ret = ctdb_do_takeip(ctdb, c, vnn);
657                 if (ret != 0) {
658                         return -1;
659                 }
660         } else if (do_updateip) {
661                 ret = ctdb_do_updateip(ctdb, c, vnn);
662                 if (ret != 0) {
663                         return -1;
664                 }
665         } else {
666                 /*
667                  * The interface is up and the kernel known the ip
668                  * => do nothing
669                  */
670                 DEBUG(DEBUG_INFO,("Redundant takeover of IP %s/%u on interface %s (ip already held)\n",
671                         ctdb_addr_to_str(&pip->addr),
672                         vnn->public_netmask_bits,
673                         ctdb_vnn_iface_string(vnn)));
674                 return 0;
675         }
676
677         /* tell ctdb_control.c that we will be replying asynchronously */
678         *async_reply = true;
679
680         return 0;
681 }
682
683 /*
684   takeover an ip address old v4 style
685  */
686 int32_t ctdb_control_takeover_ipv4(struct ctdb_context *ctdb, 
687                                 struct ctdb_req_control *c,
688                                 TDB_DATA indata, 
689                                 bool *async_reply)
690 {
691         TDB_DATA data;
692         
693         data.dsize = sizeof(struct ctdb_public_ip);
694         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
695         CTDB_NO_MEMORY(ctdb, data.dptr);
696         
697         memcpy(data.dptr, indata.dptr, indata.dsize);
698         return ctdb_control_takeover_ip(ctdb, c, data, async_reply);
699 }
700
701 /*
702   kill any clients that are registered with a IP that is being released
703  */
704 static void release_kill_clients(struct ctdb_context *ctdb, ctdb_sock_addr *addr)
705 {
706         struct ctdb_client_ip *ip;
707
708         DEBUG(DEBUG_INFO,("release_kill_clients for ip %s\n",
709                 ctdb_addr_to_str(addr)));
710
711         for (ip=ctdb->client_ip_list; ip; ip=ip->next) {
712                 ctdb_sock_addr tmp_addr;
713
714                 tmp_addr = ip->addr;
715                 DEBUG(DEBUG_INFO,("checking for client %u with IP %s\n", 
716                         ip->client_id,
717                         ctdb_addr_to_str(&ip->addr)));
718
719                 if (ctdb_same_ip(&tmp_addr, addr)) {
720                         struct ctdb_client *client = ctdb_reqid_find(ctdb, 
721                                                                      ip->client_id, 
722                                                                      struct ctdb_client);
723                         DEBUG(DEBUG_INFO,("matched client %u with IP %s and pid %u\n", 
724                                 ip->client_id,
725                                 ctdb_addr_to_str(&ip->addr),
726                                 client->pid));
727
728                         if (client->pid != 0) {
729                                 DEBUG(DEBUG_INFO,(__location__ " Killing client pid %u for IP %s on client_id %u\n",
730                                         (unsigned)client->pid,
731                                         ctdb_addr_to_str(addr),
732                                         ip->client_id));
733                                 kill(client->pid, SIGKILL);
734                         }
735                 }
736         }
737 }
738
739 /*
740   called when releaseip event finishes
741  */
742 static void release_ip_callback(struct ctdb_context *ctdb, int status, 
743                                 void *private_data)
744 {
745         struct takeover_callback_state *state = 
746                 talloc_get_type(private_data, struct takeover_callback_state);
747         TDB_DATA data;
748
749         if (status == -ETIME) {
750                 ctdb_ban_self(ctdb);
751         }
752
753         /* send a message to all clients of this node telling them
754            that the cluster has been reconfigured and they should
755            release any sockets on this IP */
756         data.dptr = (uint8_t *)talloc_strdup(state, ctdb_addr_to_str(state->addr));
757         CTDB_NO_MEMORY_VOID(ctdb, data.dptr);
758         data.dsize = strlen((char *)data.dptr)+1;
759
760         DEBUG(DEBUG_INFO,(__location__ " sending RELEASE_IP for '%s'\n", data.dptr));
761
762         ctdb_daemon_send_message(ctdb, ctdb->pnn, CTDB_SRVID_RELEASE_IP, data);
763
764         /* kill clients that have registered with this IP */
765         release_kill_clients(ctdb, state->addr);
766
767         ctdb_vnn_unassign_iface(ctdb, state->vnn);
768
769         /* the control succeeded */
770         ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
771         talloc_free(state);
772 }
773
774 /*
775   release an ip address
776  */
777 int32_t ctdb_control_release_ip(struct ctdb_context *ctdb, 
778                                 struct ctdb_req_control *c,
779                                 TDB_DATA indata, 
780                                 bool *async_reply)
781 {
782         int ret;
783         struct takeover_callback_state *state;
784         struct ctdb_public_ip *pip = (struct ctdb_public_ip *)indata.dptr;
785         struct ctdb_vnn *vnn;
786
787         /* update our vnn list */
788         vnn = find_public_ip_vnn(ctdb, &pip->addr);
789         if (vnn == NULL) {
790                 DEBUG(DEBUG_INFO,("releaseip called for an ip '%s' that is not a public address\n",
791                         ctdb_addr_to_str(&pip->addr)));
792                 return 0;
793         }
794         vnn->pnn = pip->pnn;
795
796         /* stop any previous arps */
797         talloc_free(vnn->takeover_ctx);
798         vnn->takeover_ctx = NULL;
799
800         if (!ctdb_sys_have_ip(&pip->addr)) {
801                 DEBUG(DEBUG_DEBUG,("Redundant release of IP %s/%u on interface %s (ip not held)\n", 
802                         ctdb_addr_to_str(&pip->addr),
803                         vnn->public_netmask_bits, 
804                         ctdb_vnn_iface_string(vnn)));
805                 ctdb_vnn_unassign_iface(ctdb, vnn);
806                 return 0;
807         }
808
809         if (vnn->iface == NULL) {
810                 DEBUG(DEBUG_ERR,(__location__ " release_ip of IP %s is known to the kernel, "
811                                  "but we have no interface assigned, has someone manually configured it? Ignore for now.\n",
812                                  ctdb_addr_to_str(&vnn->public_address)));
813                 return 0;
814         }
815
816         DEBUG(DEBUG_NOTICE,("Release of IP %s/%u on interface %s  node:%d\n",
817                 ctdb_addr_to_str(&pip->addr),
818                 vnn->public_netmask_bits, 
819                 ctdb_vnn_iface_string(vnn),
820                 pip->pnn));
821
822         state = talloc(ctdb, struct takeover_callback_state);
823         CTDB_NO_MEMORY(ctdb, state);
824
825         state->c = talloc_steal(state, c);
826         state->addr = talloc(state, ctdb_sock_addr);       
827         CTDB_NO_MEMORY(ctdb, state->addr);
828         *state->addr = pip->addr;
829         state->vnn   = vnn;
830
831         ret = ctdb_event_script_callback(ctdb, 
832                                          state, release_ip_callback, state,
833                                          false,
834                                          CTDB_EVENT_RELEASE_IP,
835                                          "%s %s %u",
836                                          ctdb_vnn_iface_string(vnn),
837                                          ctdb_addr_to_str(&pip->addr),
838                                          vnn->public_netmask_bits);
839         if (ret != 0) {
840                 DEBUG(DEBUG_ERR,(__location__ " Failed to release IP %s on interface %s\n",
841                         ctdb_addr_to_str(&pip->addr),
842                         ctdb_vnn_iface_string(vnn)));
843                 talloc_free(state);
844                 return -1;
845         }
846
847         /* tell the control that we will be reply asynchronously */
848         *async_reply = true;
849         return 0;
850 }
851
852 /*
853   release an ip address old v4 style
854  */
855 int32_t ctdb_control_release_ipv4(struct ctdb_context *ctdb, 
856                                 struct ctdb_req_control *c,
857                                 TDB_DATA indata, 
858                                 bool *async_reply)
859 {
860         TDB_DATA data;
861         
862         data.dsize = sizeof(struct ctdb_public_ip);
863         data.dptr  = (uint8_t *)talloc_zero(c, struct ctdb_public_ip);
864         CTDB_NO_MEMORY(ctdb, data.dptr);
865         
866         memcpy(data.dptr, indata.dptr, indata.dsize);
867         return ctdb_control_release_ip(ctdb, c, data, async_reply);
868 }
869
870
871 static int ctdb_add_public_address(struct ctdb_context *ctdb,
872                                    ctdb_sock_addr *addr,
873                                    unsigned mask, const char *ifaces)
874 {
875         struct ctdb_vnn      *vnn;
876         uint32_t num = 0;
877         char *tmp;
878         const char *iface;
879         int i;
880         int ret;
881
882         /* Verify that we dont have an entry for this ip yet */
883         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
884                 if (ctdb_same_sockaddr(addr, &vnn->public_address)) {
885                         DEBUG(DEBUG_CRIT,("Same ip '%s' specified multiple times in the public address list \n", 
886                                 ctdb_addr_to_str(addr)));
887                         return -1;
888                 }               
889         }
890
891         /* create a new vnn structure for this ip address */
892         vnn = talloc_zero(ctdb, struct ctdb_vnn);
893         CTDB_NO_MEMORY_FATAL(ctdb, vnn);
894         vnn->ifaces = talloc_array(vnn, const char *, num + 2);
895         tmp = talloc_strdup(vnn, ifaces);
896         CTDB_NO_MEMORY_FATAL(ctdb, tmp);
897         for (iface = strtok(tmp, ","); iface; iface = strtok(NULL, ",")) {
898                 vnn->ifaces = talloc_realloc(vnn, vnn->ifaces, const char *, num + 2);
899                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces);
900                 vnn->ifaces[num] = talloc_strdup(vnn, iface);
901                 CTDB_NO_MEMORY_FATAL(ctdb, vnn->ifaces[num]);
902                 num++;
903         }
904         talloc_free(tmp);
905         vnn->ifaces[num] = NULL;
906         vnn->public_address      = *addr;
907         vnn->public_netmask_bits = mask;
908         vnn->pnn                 = -1;
909         if (ctdb_sys_have_ip(addr)) {
910                 DEBUG(DEBUG_ERR,("We are already hosting public address '%s'. setting PNN to ourself:%d\n", ctdb_addr_to_str(addr), ctdb->pnn));
911                 vnn->pnn = ctdb->pnn;
912         }
913
914         for (i=0; vnn->ifaces[i]; i++) {
915                 ret = ctdb_add_local_iface(ctdb, vnn->ifaces[i]);
916                 if (ret != 0) {
917                         DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
918                                            "for public_address[%s]\n",
919                                            vnn->ifaces[i], ctdb_addr_to_str(addr)));
920                         talloc_free(vnn);
921                         return -1;
922                 }
923                 if (i == 0) {
924                         vnn->iface = ctdb_find_iface(ctdb, vnn->ifaces[i]);
925                 }
926         }
927
928         DLIST_ADD(ctdb->vnn, vnn);
929
930         return 0;
931 }
932
933 /*
934   setup the event script directory
935 */
936 int ctdb_set_event_script_dir(struct ctdb_context *ctdb, const char *script_dir)
937 {
938         ctdb->event_script_dir = talloc_strdup(ctdb, script_dir);
939         CTDB_NO_MEMORY(ctdb, ctdb->event_script_dir);
940         return 0;
941 }
942
943 /*
944   setup the public address lists from a file
945 */
946 int ctdb_set_public_addresses(struct ctdb_context *ctdb, const char *alist)
947 {
948         char **lines;
949         int nlines;
950         int i;
951
952         lines = file_lines_load(alist, &nlines, ctdb);
953         if (lines == NULL) {
954                 ctdb_set_error(ctdb, "Failed to load public address list '%s'\n", alist);
955                 return -1;
956         }
957         while (nlines > 0 && strcmp(lines[nlines-1], "") == 0) {
958                 nlines--;
959         }
960
961         for (i=0;i<nlines;i++) {
962                 unsigned mask;
963                 ctdb_sock_addr addr;
964                 const char *addrstr;
965                 const char *ifaces;
966                 char *tok, *line;
967
968                 line = lines[i];
969                 while ((*line == ' ') || (*line == '\t')) {
970                         line++;
971                 }
972                 if (*line == '#') {
973                         continue;
974                 }
975                 if (strcmp(line, "") == 0) {
976                         continue;
977                 }
978                 tok = strtok(line, " \t");
979                 addrstr = tok;
980                 tok = strtok(NULL, " \t");
981                 if (tok == NULL) {
982                         if (NULL == ctdb->default_public_interface) {
983                                 DEBUG(DEBUG_CRIT,("No default public interface and no interface specified at line %u of public address list\n",
984                                          i+1));
985                                 talloc_free(lines);
986                                 return -1;
987                         }
988                         ifaces = ctdb->default_public_interface;
989                 } else {
990                         ifaces = tok;
991                 }
992
993                 if (!addrstr || !parse_ip_mask(addrstr, ifaces, &addr, &mask)) {
994                         DEBUG(DEBUG_CRIT,("Badly formed line %u in public address list\n", i+1));
995                         talloc_free(lines);
996                         return -1;
997                 }
998                 if (ctdb_add_public_address(ctdb, &addr, mask, ifaces)) {
999                         DEBUG(DEBUG_CRIT,("Failed to add line %u to the public address list\n", i+1));
1000                         talloc_free(lines);
1001                         return -1;
1002                 }
1003         }
1004
1005         talloc_free(lines);
1006         return 0;
1007 }
1008
1009 int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
1010                               const char *iface,
1011                               const char *ip)
1012 {
1013         struct ctdb_vnn *svnn;
1014         struct ctdb_iface *cur = NULL;
1015         bool ok;
1016         int ret;
1017
1018         svnn = talloc_zero(ctdb, struct ctdb_vnn);
1019         CTDB_NO_MEMORY(ctdb, svnn);
1020
1021         svnn->ifaces = talloc_array(svnn, const char *, 2);
1022         CTDB_NO_MEMORY(ctdb, svnn->ifaces);
1023         svnn->ifaces[0] = talloc_strdup(svnn->ifaces, iface);
1024         CTDB_NO_MEMORY(ctdb, svnn->ifaces[0]);
1025         svnn->ifaces[1] = NULL;
1026
1027         ok = parse_ip(ip, iface, 0, &svnn->public_address);
1028         if (!ok) {
1029                 talloc_free(svnn);
1030                 return -1;
1031         }
1032
1033         ret = ctdb_add_local_iface(ctdb, svnn->ifaces[0]);
1034         if (ret != 0) {
1035                 DEBUG(DEBUG_CRIT, (__location__ " failed to add iface[%s] "
1036                                    "for single_ip[%s]\n",
1037                                    svnn->ifaces[0],
1038                                    ctdb_addr_to_str(&svnn->public_address)));
1039                 talloc_free(svnn);
1040                 return -1;
1041         }
1042
1043         /* assume the single public ip interface is initially "good" */
1044         cur = ctdb_find_iface(ctdb, iface);
1045         if (cur == NULL) {
1046                 DEBUG(DEBUG_CRIT,("Can not find public interface %s used by --single-public-ip", iface));
1047                 return -1;
1048         }
1049         cur->link_up = true;
1050
1051         ret = ctdb_vnn_assign_iface(ctdb, svnn);
1052         if (ret != 0) {
1053                 talloc_free(svnn);
1054                 return -1;
1055         }
1056
1057         ctdb->single_ip_vnn = svnn;
1058         return 0;
1059 }
1060
1061 struct ctdb_public_ip_list {
1062         struct ctdb_public_ip_list *next;
1063         uint32_t pnn;
1064         ctdb_sock_addr addr;
1065 };
1066
1067
1068 /* Given a physical node, return the number of
1069    public addresses that is currently assigned to this node.
1070 */
1071 static int node_ip_coverage(struct ctdb_context *ctdb, 
1072         int32_t pnn,
1073         struct ctdb_public_ip_list *ips)
1074 {
1075         int num=0;
1076
1077         for (;ips;ips=ips->next) {
1078                 if (ips->pnn == pnn) {
1079                         num++;
1080                 }
1081         }
1082         return num;
1083 }
1084
1085
1086 /* Check if this is a public ip known to the node, i.e. can that
1087    node takeover this ip ?
1088 */
1089 static int can_node_serve_ip(struct ctdb_context *ctdb, int32_t pnn, 
1090                 struct ctdb_public_ip_list *ip)
1091 {
1092         struct ctdb_all_public_ips *public_ips;
1093         int i;
1094
1095         public_ips = ctdb->nodes[pnn]->available_public_ips;
1096
1097         if (public_ips == NULL) {
1098                 return -1;
1099         }
1100
1101         for (i=0;i<public_ips->num;i++) {
1102                 if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
1103                         /* yes, this node can serve this public ip */
1104                         return 0;
1105                 }
1106         }
1107
1108         return -1;
1109 }
1110
1111
1112 /* search the node lists list for a node to takeover this ip.
1113    pick the node that currently are serving the least number of ips
1114    so that the ips get spread out evenly.
1115 */
1116 static int find_takeover_node(struct ctdb_context *ctdb, 
1117                 struct ctdb_node_map *nodemap, uint32_t mask, 
1118                 struct ctdb_public_ip_list *ip,
1119                 struct ctdb_public_ip_list *all_ips)
1120 {
1121         int pnn, min=0, num;
1122         int i;
1123
1124         pnn    = -1;
1125         for (i=0;i<nodemap->num;i++) {
1126                 if (nodemap->nodes[i].flags & mask) {
1127                         /* This node is not healty and can not be used to serve
1128                            a public address 
1129                         */
1130                         continue;
1131                 }
1132
1133                 /* verify that this node can serve this ip */
1134                 if (can_node_serve_ip(ctdb, i, ip)) {
1135                         /* no it couldnt   so skip to the next node */
1136                         continue;
1137                 }
1138
1139                 num = node_ip_coverage(ctdb, i, all_ips);
1140                 /* was this the first node we checked ? */
1141                 if (pnn == -1) {
1142                         pnn = i;
1143                         min  = num;
1144                 } else {
1145                         if (num < min) {
1146                                 pnn = i;
1147                                 min  = num;
1148                         }
1149                 }
1150         }       
1151         if (pnn == -1) {
1152                 DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n",
1153                         ctdb_addr_to_str(&ip->addr)));
1154
1155                 return -1;
1156         }
1157
1158         ip->pnn = pnn;
1159         return 0;
1160 }
1161
1162 #define IP_KEYLEN       4
1163 static uint32_t *ip_key(ctdb_sock_addr *ip)
1164 {
1165         static uint32_t key[IP_KEYLEN];
1166
1167         bzero(key, sizeof(key));
1168
1169         switch (ip->sa.sa_family) {
1170         case AF_INET:
1171                 key[3]  = htonl(ip->ip.sin_addr.s_addr);
1172                 break;
1173         case AF_INET6:
1174                 key[0]  = htonl(ip->ip6.sin6_addr.s6_addr32[0]);
1175                 key[1]  = htonl(ip->ip6.sin6_addr.s6_addr32[1]);
1176                 key[2]  = htonl(ip->ip6.sin6_addr.s6_addr32[2]);
1177                 key[3]  = htonl(ip->ip6.sin6_addr.s6_addr32[3]);
1178                 break;
1179         default:
1180                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family));
1181                 return key;
1182         }
1183
1184         return key;
1185 }
1186
1187 static void *add_ip_callback(void *parm, void *data)
1188 {
1189         struct ctdb_public_ip_list *this_ip = parm; 
1190         struct ctdb_public_ip_list *prev_ip = data; 
1191
1192         if (prev_ip == NULL) {
1193                 return parm;
1194         }
1195         if (this_ip->pnn == -1) {
1196                 this_ip->pnn = prev_ip->pnn;
1197         }
1198
1199         return parm;
1200 }
1201
1202 void getips_count_callback(void *param, void *data)
1203 {
1204         struct ctdb_public_ip_list **ip_list = (struct ctdb_public_ip_list **)param;
1205         struct ctdb_public_ip_list *new_ip = (struct ctdb_public_ip_list *)data;
1206
1207         new_ip->next = *ip_list;
1208         *ip_list     = new_ip;
1209 }
1210
1211 static struct ctdb_public_ip_list *
1212 create_merged_ip_list(struct ctdb_context *ctdb)
1213 {
1214         int i, j;
1215         struct ctdb_public_ip_list *ip_list;
1216         struct ctdb_all_public_ips *public_ips;
1217
1218         if (ctdb->ip_tree != NULL) {
1219                 talloc_free(ctdb->ip_tree);
1220                 ctdb->ip_tree = NULL;
1221         }
1222         ctdb->ip_tree = trbt_create(ctdb, 0);
1223
1224         for (i=0;i<ctdb->num_nodes;i++) {
1225                 public_ips = ctdb->nodes[i]->known_public_ips;
1226
1227                 if (ctdb->nodes[i]->flags & NODE_FLAGS_DELETED) {
1228                         continue;
1229                 }
1230
1231                 /* there were no public ips for this node */
1232                 if (public_ips == NULL) {
1233                         continue;
1234                 }               
1235
1236                 for (j=0;j<public_ips->num;j++) {
1237                         struct ctdb_public_ip_list *tmp_ip; 
1238
1239                         tmp_ip = talloc_zero(ctdb->ip_tree, struct ctdb_public_ip_list);
1240                         CTDB_NO_MEMORY_NULL(ctdb, tmp_ip);
1241                         tmp_ip->pnn  = public_ips->ips[j].pnn;
1242                         tmp_ip->addr = public_ips->ips[j].addr;
1243                         tmp_ip->next = NULL;
1244
1245                         trbt_insertarray32_callback(ctdb->ip_tree,
1246                                 IP_KEYLEN, ip_key(&public_ips->ips[j].addr),
1247                                 add_ip_callback,
1248                                 tmp_ip);
1249                 }
1250         }
1251
1252         ip_list = NULL;
1253         trbt_traversearray32(ctdb->ip_tree, IP_KEYLEN, getips_count_callback, &ip_list);
1254
1255         return ip_list;
1256 }
1257
1258 /*
1259   make any IP alias changes for public addresses that are necessary 
1260  */
1261 int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap)
1262 {
1263   int i, num_healthy, retries, num_ips;
1264         struct ctdb_public_ip ip;
1265         struct ctdb_public_ipv4 ipv4;
1266         uint32_t mask, *nodes;
1267         struct ctdb_public_ip_list *all_ips, *tmp_ip;
1268         int maxnode, maxnum=0, minnode, minnum=0, num;
1269         TDB_DATA data;
1270         struct timeval timeout;
1271         struct client_async_data *async_data;
1272         struct ctdb_client_control_state *state;
1273         TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1274
1275         /*
1276          * ip failover is completely disabled, just send out the 
1277          * ipreallocated event.
1278          */
1279         if (ctdb->tunable.disable_ip_failover != 0) {
1280                 goto ipreallocated;
1281         }
1282
1283         ZERO_STRUCT(ip);
1284
1285         /* Count how many completely healthy nodes we have */
1286         num_healthy = 0;
1287         for (i=0;i<nodemap->num;i++) {
1288                 if (!(nodemap->nodes[i].flags & (NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED))) {
1289                         num_healthy++;
1290                 }
1291         }
1292
1293         if (num_healthy > 0) {
1294                 /* We have healthy nodes, so only consider them for 
1295                    serving public addresses
1296                 */
1297                 mask = NODE_FLAGS_INACTIVE|NODE_FLAGS_DISABLED;
1298         } else {
1299                 /* We didnt have any completely healthy nodes so
1300                    use "disabled" nodes as a fallback
1301                 */
1302                 mask = NODE_FLAGS_INACTIVE;
1303         }
1304
1305         /* since nodes only know about those public addresses that
1306            can be served by that particular node, no single node has
1307            a full list of all public addresses that exist in the cluster.
1308            Walk over all node structures and create a merged list of
1309            all public addresses that exist in the cluster.
1310
1311            keep the tree of ips around as ctdb->ip_tree
1312         */
1313         all_ips = create_merged_ip_list(ctdb);
1314
1315         /* Count how many ips we have */
1316         num_ips = 0;
1317         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1318                 num_ips++;
1319         }
1320
1321         /* If we want deterministic ip allocations, i.e. that the ip addresses
1322            will always be allocated the same way for a specific set of
1323            available/unavailable nodes.
1324         */
1325         if (1 == ctdb->tunable.deterministic_public_ips) {              
1326                 DEBUG(DEBUG_NOTICE,("Deterministic IPs enabled. Resetting all ip allocations\n"));
1327                 for (i=0,tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next,i++) {
1328                         tmp_ip->pnn = i%nodemap->num;
1329                 }
1330         }
1331
1332
1333         /* mark all public addresses with a masked node as being served by
1334            node -1
1335         */
1336         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1337                 if (tmp_ip->pnn == -1) {
1338                         continue;
1339                 }
1340                 if (nodemap->nodes[tmp_ip->pnn].flags & mask) {
1341                         tmp_ip->pnn = -1;
1342                 }
1343         }
1344
1345         /* verify that the assigned nodes can serve that public ip
1346            and set it to -1 if not
1347         */
1348         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1349                 if (tmp_ip->pnn == -1) {
1350                         continue;
1351                 }
1352                 if (can_node_serve_ip(ctdb, tmp_ip->pnn, tmp_ip) != 0) {
1353                         /* this node can not serve this ip. */
1354                         tmp_ip->pnn = -1;
1355                 }
1356         }
1357
1358
1359         /* now we must redistribute all public addresses with takeover node
1360            -1 among the nodes available
1361         */
1362         retries = 0;
1363 try_again:
1364         /* loop over all ip's and find a physical node to cover for 
1365            each unassigned ip.
1366         */
1367         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1368                 if (tmp_ip->pnn == -1) {
1369                         if (find_takeover_node(ctdb, nodemap, mask, tmp_ip, all_ips)) {
1370                                 DEBUG(DEBUG_WARNING,("Failed to find node to cover ip %s\n",
1371                                         ctdb_addr_to_str(&tmp_ip->addr)));
1372                         }
1373                 }
1374         }
1375
1376         /* If we dont want ips to fail back after a node becomes healthy
1377            again, we wont even try to reallocat the ip addresses so that
1378            they are evenly spread out.
1379            This can NOT be used at the same time as DeterministicIPs !
1380         */
1381         if (1 == ctdb->tunable.no_ip_failback) {
1382                 if (1 == ctdb->tunable.deterministic_public_ips) {
1383                         DEBUG(DEBUG_ERR, ("ERROR: You can not use 'DeterministicIPs' and 'NoIPFailback' at the same time\n"));
1384                 }
1385                 goto finished;
1386         }
1387
1388
1389         /* now, try to make sure the ip adresses are evenly distributed
1390            across the node.
1391            for each ip address, loop over all nodes that can serve this
1392            ip and make sure that the difference between the node
1393            serving the most and the node serving the least ip's are not greater
1394            than 1.
1395         */
1396         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1397                 if (tmp_ip->pnn == -1) {
1398                         continue;
1399                 }
1400
1401                 /* Get the highest and lowest number of ips's served by any 
1402                    valid node which can serve this ip.
1403                 */
1404                 maxnode = -1;
1405                 minnode = -1;
1406                 for (i=0;i<nodemap->num;i++) {
1407                         if (nodemap->nodes[i].flags & mask) {
1408                                 continue;
1409                         }
1410
1411                         /* only check nodes that can actually serve this ip */
1412                         if (can_node_serve_ip(ctdb, i, tmp_ip)) {
1413                                 /* no it couldnt   so skip to the next node */
1414                                 continue;
1415                         }
1416
1417                         num = node_ip_coverage(ctdb, i, all_ips);
1418                         if (maxnode == -1) {
1419                                 maxnode = i;
1420                                 maxnum  = num;
1421                         } else {
1422                                 if (num > maxnum) {
1423                                         maxnode = i;
1424                                         maxnum  = num;
1425                                 }
1426                         }
1427                         if (minnode == -1) {
1428                                 minnode = i;
1429                                 minnum  = num;
1430                         } else {
1431                                 if (num < minnum) {
1432                                         minnode = i;
1433                                         minnum  = num;
1434                                 }
1435                         }
1436                 }
1437                 if (maxnode == -1) {
1438                         DEBUG(DEBUG_WARNING,(__location__ " Could not find maxnode. May not be able to serve ip '%s'\n",
1439                                 ctdb_addr_to_str(&tmp_ip->addr)));
1440
1441                         continue;
1442                 }
1443
1444                 /* If we want deterministic IPs then dont try to reallocate 
1445                    them to spread out the load.
1446                 */
1447                 if (1 == ctdb->tunable.deterministic_public_ips) {
1448                         continue;
1449                 }
1450
1451                 /* if the spread between the smallest and largest coverage by
1452                    a node is >=2 we steal one of the ips from the node with
1453                    most coverage to even things out a bit.
1454                    try to do this a limited number of times since we dont
1455                    want to spend too much time balancing the ip coverage.
1456                 */
1457                 if ( (maxnum > minnum+1)
1458                   && (retries < (num_ips + 5)) ){
1459                         struct ctdb_public_ip_list *tmp;
1460
1461                         /* mark one of maxnode's vnn's as unassigned and try
1462                            again
1463                         */
1464                         for (tmp=all_ips;tmp;tmp=tmp->next) {
1465                                 if (tmp->pnn == maxnode) {
1466                                         tmp->pnn = -1;
1467                                         retries++;
1468                                         goto try_again;
1469                                 }
1470                         }
1471                 }
1472         }
1473
1474
1475         /* finished distributing the public addresses, now just send the 
1476            info out to the nodes
1477         */
1478 finished:
1479
1480         /* at this point ->pnn is the node which will own each IP
1481            or -1 if there is no node that can cover this ip
1482         */
1483
1484         /* now tell all nodes to delete any alias that they should not
1485            have.  This will be a NOOP on nodes that don't currently
1486            hold the given alias */
1487         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1488         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1489
1490         for (i=0;i<nodemap->num;i++) {
1491                 /* don't talk to unconnected nodes, but do talk to banned nodes */
1492                 if (nodemap->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
1493                         continue;
1494                 }
1495
1496                 for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1497                         if (tmp_ip->pnn == nodemap->nodes[i].pnn) {
1498                                 /* This node should be serving this
1499                                    vnn so dont tell it to release the ip
1500                                 */
1501                                 continue;
1502                         }
1503                         if (tmp_ip->addr.sa.sa_family == AF_INET) {
1504                                 ipv4.pnn = tmp_ip->pnn;
1505                                 ipv4.sin = tmp_ip->addr.ip;
1506
1507                                 timeout = TAKEOVER_TIMEOUT();
1508                                 data.dsize = sizeof(ipv4);
1509                                 data.dptr  = (uint8_t *)&ipv4;
1510                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1511                                                 0, CTDB_CONTROL_RELEASE_IPv4, 0,
1512                                                 data, async_data,
1513                                                 &timeout, NULL);
1514                         } else {
1515                                 ip.pnn  = tmp_ip->pnn;
1516                                 ip.addr = tmp_ip->addr;
1517
1518                                 timeout = TAKEOVER_TIMEOUT();
1519                                 data.dsize = sizeof(ip);
1520                                 data.dptr  = (uint8_t *)&ip;
1521                                 state = ctdb_control_send(ctdb, nodemap->nodes[i].pnn,
1522                                                 0, CTDB_CONTROL_RELEASE_IP, 0,
1523                                                 data, async_data,
1524                                                 &timeout, NULL);
1525                         }
1526
1527                         if (state == NULL) {
1528                                 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_RELEASE_IP to node %u\n", nodemap->nodes[i].pnn));
1529                                 talloc_free(tmp_ctx);
1530                                 return -1;
1531                         }
1532                 
1533                         ctdb_client_async_add(async_data, state);
1534                 }
1535         }
1536         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1537                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_RELEASE_IP failed\n"));
1538                 talloc_free(tmp_ctx);
1539                 return -1;
1540         }
1541         talloc_free(async_data);
1542
1543
1544         /* tell all nodes to get their own IPs */
1545         async_data = talloc_zero(tmp_ctx, struct client_async_data);
1546         CTDB_NO_MEMORY_FATAL(ctdb, async_data);
1547         for (tmp_ip=all_ips;tmp_ip;tmp_ip=tmp_ip->next) {
1548                 if (tmp_ip->pnn == -1) {
1549                         /* this IP won't be taken over */
1550                         continue;
1551                 }
1552
1553                 if (tmp_ip->addr.sa.sa_family == AF_INET) {
1554                         ipv4.pnn = tmp_ip->pnn;
1555                         ipv4.sin = tmp_ip->addr.ip;
1556
1557                         timeout = TAKEOVER_TIMEOUT();
1558                         data.dsize = sizeof(ipv4);
1559                         data.dptr  = (uint8_t *)&ipv4;
1560                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1561                                         0, CTDB_CONTROL_TAKEOVER_IPv4, 0,
1562                                         data, async_data,
1563                                         &timeout, NULL);
1564                 } else {
1565                         ip.pnn  = tmp_ip->pnn;
1566                         ip.addr = tmp_ip->addr;
1567
1568                         timeout = TAKEOVER_TIMEOUT();
1569                         data.dsize = sizeof(ip);
1570                         data.dptr  = (uint8_t *)&ip;
1571                         state = ctdb_control_send(ctdb, tmp_ip->pnn,
1572                                         0, CTDB_CONTROL_TAKEOVER_IP, 0,
1573                                         data, async_data,
1574                                         &timeout, NULL);
1575                 }
1576                 if (state == NULL) {
1577                         DEBUG(DEBUG_ERR,(__location__ " Failed to call async control CTDB_CONTROL_TAKEOVER_IP to node %u\n", tmp_ip->pnn));
1578                         talloc_free(tmp_ctx);
1579                         return -1;
1580                 }
1581                 
1582                 ctdb_client_async_add(async_data, state);
1583         }
1584         if (ctdb_client_async_wait(ctdb, async_data) != 0) {
1585                 DEBUG(DEBUG_ERR,(__location__ " Async control CTDB_CONTROL_TAKEOVER_IP failed\n"));
1586                 talloc_free(tmp_ctx);
1587                 return -1;
1588         }
1589
1590 ipreallocated:
1591         /* tell all nodes to update natwg */
1592         /* send the flags update natgw on all connected nodes */
1593         data.dptr  = discard_const("ipreallocated");
1594         data.dsize = strlen((char *)data.dptr) + 1; 
1595         nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
1596         if (ctdb_client_async_control(ctdb, CTDB_CONTROL_RUN_EVENTSCRIPTS,
1597                                       nodes, 0, TAKEOVER_TIMEOUT(),
1598                                       false, data,
1599                                       NULL, NULL,
1600                                       NULL) != 0) {
1601                 DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n"));
1602         }
1603
1604         talloc_free(tmp_ctx);
1605         return 0;
1606 }
1607
1608
1609 /*
1610   destroy a ctdb_client_ip structure
1611  */
1612 static int ctdb_client_ip_destructor(struct ctdb_client_ip *ip)
1613 {
1614         DEBUG(DEBUG_DEBUG,("destroying client tcp for %s:%u (client_id %u)\n",
1615                 ctdb_addr_to_str(&ip->addr),
1616                 ntohs(ip->addr.ip.sin_port),
1617                 ip->client_id));
1618
1619         DLIST_REMOVE(ip->ctdb->client_ip_list, ip);
1620         return 0;
1621 }
1622
1623 /*
1624   called by a client to inform us of a TCP connection that it is managing
1625   that should tickled with an ACK when IP takeover is done
1626   we handle both the old ipv4 style of packets as well as the new ipv4/6
1627   pdus.
1628  */
1629 int32_t ctdb_control_tcp_client(struct ctdb_context *ctdb, uint32_t client_id,
1630                                 TDB_DATA indata)
1631 {
1632         struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client);
1633         struct ctdb_control_tcp *old_addr = NULL;
1634         struct ctdb_control_tcp_addr new_addr;
1635         struct ctdb_control_tcp_addr *tcp_sock = NULL;
1636         struct ctdb_tcp_list *tcp;
1637         struct ctdb_tcp_connection t;
1638         int ret;
1639         TDB_DATA data;
1640         struct ctdb_client_ip *ip;
1641         struct ctdb_vnn *vnn;
1642         ctdb_sock_addr addr;
1643
1644         switch (indata.dsize) {
1645         case sizeof(struct ctdb_control_tcp):
1646                 old_addr = (struct ctdb_control_tcp *)indata.dptr;
1647                 ZERO_STRUCT(new_addr);
1648                 tcp_sock = &new_addr;
1649                 tcp_sock->src.ip  = old_addr->src;
1650                 tcp_sock->dest.ip = old_addr->dest;
1651                 break;
1652         case sizeof(struct ctdb_control_tcp_addr):
1653                 tcp_sock = (struct ctdb_control_tcp_addr *)indata.dptr;
1654                 break;
1655         default:
1656                 DEBUG(DEBUG_ERR,(__location__ " Invalid data structure passed "
1657                                  "to ctdb_control_tcp_client. size was %d but "
1658                                  "only allowed sizes are %lu and %lu\n",
1659                                  (int)indata.dsize,
1660                                  (long unsigned)sizeof(struct ctdb_control_tcp),
1661                                  (long unsigned)sizeof(struct ctdb_control_tcp_addr)));
1662                 return -1;
1663         }
1664
1665         addr = tcp_sock->src;
1666         ctdb_canonicalize_ip(&addr,  &tcp_sock->src);
1667         addr = tcp_sock->dest;
1668         ctdb_canonicalize_ip(&addr, &tcp_sock->dest);
1669
1670         ZERO_STRUCT(addr);
1671         memcpy(&addr, &tcp_sock->dest, sizeof(addr));
1672         vnn = find_public_ip_vnn(ctdb, &addr);
1673         if (vnn == NULL) {
1674                 switch (addr.sa.sa_family) {
1675                 case AF_INET:
1676                         if (ntohl(addr.ip.sin_addr.s_addr) != INADDR_LOOPBACK) {
1677                                 DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public address.\n", 
1678                                         ctdb_addr_to_str(&addr)));
1679                         }
1680                         break;
1681                 case AF_INET6:
1682                         DEBUG(DEBUG_ERR,("Could not add client IP %s. This is not a public ipv6 address.\n", 
1683                                 ctdb_addr_to_str(&addr)));
1684                         break;
1685                 default:
1686                         DEBUG(DEBUG_ERR,(__location__ " Unknown family type %d\n", addr.sa.sa_family));
1687                 }
1688
1689                 return 0;
1690         }
1691
1692         if (vnn->pnn != ctdb->pnn) {
1693                 DEBUG(DEBUG_ERR,("Attempt to register tcp client for IP %s we don't hold - failing (client_id %u pid %u)\n",
1694                         ctdb_addr_to_str(&addr),
1695                         client_id, client->pid));
1696                 /* failing this call will tell smbd to die */
1697                 return -1;
1698         }
1699
1700         ip = talloc(client, struct ctdb_client_ip);
1701         CTDB_NO_MEMORY(ctdb, ip);
1702
1703         ip->ctdb      = ctdb;
1704         ip->addr      = addr;
1705         ip->client_id = client_id;
1706         talloc_set_destructor(ip, ctdb_client_ip_destructor);
1707         DLIST_ADD(ctdb->client_ip_list, ip);
1708
1709         tcp = talloc(client, struct ctdb_tcp_list);
1710         CTDB_NO_MEMORY(ctdb, tcp);
1711
1712         tcp->connection.src_addr = tcp_sock->src;
1713         tcp->connection.dst_addr = tcp_sock->dest;
1714
1715         DLIST_ADD(client->tcp_list, tcp);
1716
1717         t.src_addr = tcp_sock->src;
1718         t.dst_addr = tcp_sock->dest;
1719
1720         data.dptr = (uint8_t *)&t;
1721         data.dsize = sizeof(t);
1722
1723         switch (addr.sa.sa_family) {
1724         case AF_INET:
1725                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1726                         (unsigned)ntohs(tcp_sock->dest.ip.sin_port), 
1727                         ctdb_addr_to_str(&tcp_sock->src),
1728                         (unsigned)ntohs(tcp_sock->src.ip.sin_port), client_id, client->pid));
1729                 break;
1730         case AF_INET6:
1731                 DEBUG(DEBUG_INFO,("registered tcp client for %u->%s:%u (client_id %u pid %u)\n",
1732                         (unsigned)ntohs(tcp_sock->dest.ip6.sin6_port), 
1733                         ctdb_addr_to_str(&tcp_sock->src),
1734                         (unsigned)ntohs(tcp_sock->src.ip6.sin6_port), client_id, client->pid));
1735                 break;
1736         default:
1737                 DEBUG(DEBUG_ERR,(__location__ " Unknown family %d\n", addr.sa.sa_family));
1738         }
1739
1740
1741         /* tell all nodes about this tcp connection */
1742         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
1743                                        CTDB_CONTROL_TCP_ADD,
1744                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
1745         if (ret != 0) {
1746                 DEBUG(DEBUG_ERR,(__location__ " Failed to send CTDB_CONTROL_TCP_ADD\n"));
1747                 return -1;
1748         }
1749
1750         return 0;
1751 }
1752
1753 /*
1754   find a tcp address on a list
1755  */
1756 static struct ctdb_tcp_connection *ctdb_tcp_find(struct ctdb_tcp_array *array, 
1757                                            struct ctdb_tcp_connection *tcp)
1758 {
1759         int i;
1760
1761         if (array == NULL) {
1762                 return NULL;
1763         }
1764
1765         for (i=0;i<array->num;i++) {
1766                 if (ctdb_same_sockaddr(&array->connections[i].src_addr, &tcp->src_addr) &&
1767                     ctdb_same_sockaddr(&array->connections[i].dst_addr, &tcp->dst_addr)) {
1768                         return &array->connections[i];
1769                 }
1770         }
1771         return NULL;
1772 }
1773
1774
1775
1776 /*
1777   called by a daemon to inform us of a TCP connection that one of its
1778   clients managing that should tickled with an ACK when IP takeover is
1779   done
1780  */
1781 int32_t ctdb_control_tcp_add(struct ctdb_context *ctdb, TDB_DATA indata, bool tcp_update_needed)
1782 {
1783         struct ctdb_tcp_connection *p = (struct ctdb_tcp_connection *)indata.dptr;
1784         struct ctdb_tcp_array *tcparray;
1785         struct ctdb_tcp_connection tcp;
1786         struct ctdb_vnn *vnn;
1787
1788         vnn = find_public_ip_vnn(ctdb, &p->dst_addr);
1789         if (vnn == NULL) {
1790                 DEBUG(DEBUG_INFO,(__location__ " got TCP_ADD control for an address which is not a public address '%s'\n",
1791                         ctdb_addr_to_str(&p->dst_addr)));
1792
1793                 return -1;
1794         }
1795
1796
1797         tcparray = vnn->tcp_array;
1798
1799         /* If this is the first tickle */
1800         if (tcparray == NULL) {
1801                 tcparray = talloc_size(ctdb->nodes, 
1802                         offsetof(struct ctdb_tcp_array, connections) +
1803                         sizeof(struct ctdb_tcp_connection) * 1);
1804                 CTDB_NO_MEMORY(ctdb, tcparray);
1805                 vnn->tcp_array = tcparray;
1806
1807                 tcparray->num = 0;
1808                 tcparray->connections = talloc_size(tcparray, sizeof(struct ctdb_tcp_connection));
1809                 CTDB_NO_MEMORY(ctdb, tcparray->connections);
1810
1811                 tcparray->connections[tcparray->num].src_addr = p->src_addr;
1812                 tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1813                 tcparray->num++;
1814
1815                 if (tcp_update_needed) {
1816                         vnn->tcp_update_needed = true;
1817                 }
1818                 return 0;
1819         }
1820
1821
1822         /* Do we already have this tickle ?*/
1823         tcp.src_addr = p->src_addr;
1824         tcp.dst_addr = p->dst_addr;
1825         if (ctdb_tcp_find(vnn->tcp_array, &tcp) != NULL) {
1826                 DEBUG(DEBUG_DEBUG,("Already had tickle info for %s:%u for vnn:%u\n",
1827                         ctdb_addr_to_str(&tcp.dst_addr),
1828                         ntohs(tcp.dst_addr.ip.sin_port),
1829                         vnn->pnn));
1830                 return 0;
1831         }
1832
1833         /* A new tickle, we must add it to the array */
1834         tcparray->connections = talloc_realloc(tcparray, tcparray->connections,
1835                                         struct ctdb_tcp_connection,
1836                                         tcparray->num+1);
1837         CTDB_NO_MEMORY(ctdb, tcparray->connections);
1838
1839         vnn->tcp_array = tcparray;
1840         tcparray->connections[tcparray->num].src_addr = p->src_addr;
1841         tcparray->connections[tcparray->num].dst_addr = p->dst_addr;
1842         tcparray->num++;
1843                                 
1844         DEBUG(DEBUG_INFO,("Added tickle info for %s:%u from vnn %u\n",
1845                 ctdb_addr_to_str(&tcp.dst_addr),
1846                 ntohs(tcp.dst_addr.ip.sin_port),
1847                 vnn->pnn));
1848
1849         if (tcp_update_needed) {
1850                 vnn->tcp_update_needed = true;
1851         }
1852
1853         return 0;
1854 }
1855
1856
1857 /*
1858   called by a daemon to inform us of a TCP connection that one of its
1859   clients managing that should tickled with an ACK when IP takeover is
1860   done
1861  */
1862 static void ctdb_remove_tcp_connection(struct ctdb_context *ctdb, struct ctdb_tcp_connection *conn)
1863 {
1864         struct ctdb_tcp_connection *tcpp;
1865         struct ctdb_vnn *vnn = find_public_ip_vnn(ctdb, &conn->dst_addr);
1866
1867         if (vnn == NULL) {
1868                 DEBUG(DEBUG_ERR,(__location__ " unable to find public address %s\n",
1869                         ctdb_addr_to_str(&conn->dst_addr)));
1870                 return;
1871         }
1872
1873         /* if the array is empty we cant remove it
1874            and we dont need to do anything
1875          */
1876         if (vnn->tcp_array == NULL) {
1877                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist (array is empty) %s:%u\n",
1878                         ctdb_addr_to_str(&conn->dst_addr),
1879                         ntohs(conn->dst_addr.ip.sin_port)));
1880                 return;
1881         }
1882
1883
1884         /* See if we know this connection
1885            if we dont know this connection  then we dont need to do anything
1886          */
1887         tcpp = ctdb_tcp_find(vnn->tcp_array, conn);
1888         if (tcpp == NULL) {
1889                 DEBUG(DEBUG_INFO,("Trying to remove tickle that doesnt exist %s:%u\n",
1890                         ctdb_addr_to_str(&conn->dst_addr),
1891                         ntohs(conn->dst_addr.ip.sin_port)));
1892                 return;
1893         }
1894
1895
1896         /* We need to remove this entry from the array.
1897            Instead of allocating a new array and copying data to it
1898            we cheat and just copy the last entry in the existing array
1899            to the entry that is to be removed and just shring the 
1900            ->num field
1901          */
1902         *tcpp = vnn->tcp_array->connections[vnn->tcp_array->num - 1];
1903         vnn->tcp_array->num--;
1904
1905         /* If we deleted the last entry we also need to remove the entire array
1906          */
1907         if (vnn->tcp_array->num == 0) {
1908                 talloc_free(vnn->tcp_array);
1909                 vnn->tcp_array = NULL;
1910         }               
1911
1912         vnn->tcp_update_needed = true;
1913
1914         DEBUG(DEBUG_INFO,("Removed tickle info for %s:%u\n",
1915                 ctdb_addr_to_str(&conn->src_addr),
1916                 ntohs(conn->src_addr.ip.sin_port)));
1917 }
1918
1919
1920 /*
1921   called by a daemon to inform us of a TCP connection that one of its
1922   clients used are no longer needed in the tickle database
1923  */
1924 int32_t ctdb_control_tcp_remove(struct ctdb_context *ctdb, TDB_DATA indata)
1925 {
1926         struct ctdb_tcp_connection *conn = (struct ctdb_tcp_connection *)indata.dptr;
1927
1928         ctdb_remove_tcp_connection(ctdb, conn);
1929
1930         return 0;
1931 }
1932
1933
1934 /*
1935   called when a daemon restarts - send all tickes for all public addresses
1936   we are serving immediately to the new node.
1937  */
1938 int32_t ctdb_control_startup(struct ctdb_context *ctdb, uint32_t vnn)
1939 {
1940 /*XXX here we should send all tickes we are serving to the new node */
1941         return 0;
1942 }
1943
1944
1945 /*
1946   called when a client structure goes away - hook to remove
1947   elements from the tcp_list in all daemons
1948  */
1949 void ctdb_takeover_client_destructor_hook(struct ctdb_client *client)
1950 {
1951         while (client->tcp_list) {
1952                 struct ctdb_tcp_list *tcp = client->tcp_list;
1953                 DLIST_REMOVE(client->tcp_list, tcp);
1954                 ctdb_remove_tcp_connection(client->ctdb, &tcp->connection);
1955         }
1956 }
1957
1958
1959 /*
1960   release all IPs on shutdown
1961  */
1962 void ctdb_release_all_ips(struct ctdb_context *ctdb)
1963 {
1964         struct ctdb_vnn *vnn;
1965
1966         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
1967                 if (!ctdb_sys_have_ip(&vnn->public_address)) {
1968                         ctdb_vnn_unassign_iface(ctdb, vnn);
1969                         continue;
1970                 }
1971                 if (!vnn->iface) {
1972                         continue;
1973                 }
1974                 ctdb_event_script_args(ctdb, CTDB_EVENT_RELEASE_IP, "%s %s %u",
1975                                   ctdb_vnn_iface_string(vnn),
1976                                   ctdb_addr_to_str(&vnn->public_address),
1977                                   vnn->public_netmask_bits);
1978                 release_kill_clients(ctdb, &vnn->public_address);
1979                 ctdb_vnn_unassign_iface(ctdb, vnn);
1980         }
1981 }
1982
1983
1984 /*
1985   get list of public IPs
1986  */
1987 int32_t ctdb_control_get_public_ips(struct ctdb_context *ctdb, 
1988                                     struct ctdb_req_control *c, TDB_DATA *outdata)
1989 {
1990         int i, num, len;
1991         struct ctdb_all_public_ips *ips;
1992         struct ctdb_vnn *vnn;
1993         bool only_available = false;
1994
1995         if (c->flags & CTDB_PUBLIC_IP_FLAGS_ONLY_AVAILABLE) {
1996                 only_available = true;
1997         }
1998
1999         /* count how many public ip structures we have */
2000         num = 0;
2001         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2002                 num++;
2003         }
2004
2005         len = offsetof(struct ctdb_all_public_ips, ips) + 
2006                 num*sizeof(struct ctdb_public_ip);
2007         ips = talloc_zero_size(outdata, len);
2008         CTDB_NO_MEMORY(ctdb, ips);
2009
2010         i = 0;
2011         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2012                 if (only_available && !ctdb_vnn_available(ctdb, vnn)) {
2013                         continue;
2014                 }
2015                 ips->ips[i].pnn  = vnn->pnn;
2016                 ips->ips[i].addr = vnn->public_address;
2017                 i++;
2018         }
2019         ips->num = i;
2020         len = offsetof(struct ctdb_all_public_ips, ips) +
2021                 i*sizeof(struct ctdb_public_ip);
2022
2023         outdata->dsize = len;
2024         outdata->dptr  = (uint8_t *)ips;
2025
2026         return 0;
2027 }
2028
2029
2030 /*
2031   get list of public IPs, old ipv4 style.  only returns ipv4 addresses
2032  */
2033 int32_t ctdb_control_get_public_ipsv4(struct ctdb_context *ctdb, 
2034                                     struct ctdb_req_control *c, TDB_DATA *outdata)
2035 {
2036         int i, num, len;
2037         struct ctdb_all_public_ipsv4 *ips;
2038         struct ctdb_vnn *vnn;
2039
2040         /* count how many public ip structures we have */
2041         num = 0;
2042         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2043                 if (vnn->public_address.sa.sa_family != AF_INET) {
2044                         continue;
2045                 }
2046                 num++;
2047         }
2048
2049         len = offsetof(struct ctdb_all_public_ipsv4, ips) + 
2050                 num*sizeof(struct ctdb_public_ipv4);
2051         ips = talloc_zero_size(outdata, len);
2052         CTDB_NO_MEMORY(ctdb, ips);
2053
2054         outdata->dsize = len;
2055         outdata->dptr  = (uint8_t *)ips;
2056
2057         ips->num = num;
2058         i = 0;
2059         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2060                 if (vnn->public_address.sa.sa_family != AF_INET) {
2061                         continue;
2062                 }
2063                 ips->ips[i].pnn = vnn->pnn;
2064                 ips->ips[i].sin = vnn->public_address.ip;
2065                 i++;
2066         }
2067
2068         return 0;
2069 }
2070
2071 int32_t ctdb_control_get_public_ip_info(struct ctdb_context *ctdb,
2072                                         struct ctdb_req_control *c,
2073                                         TDB_DATA indata,
2074                                         TDB_DATA *outdata)
2075 {
2076         int i, num, len;
2077         ctdb_sock_addr *addr;
2078         struct ctdb_control_public_ip_info *info;
2079         struct ctdb_vnn *vnn;
2080
2081         addr = (ctdb_sock_addr *)indata.dptr;
2082
2083         vnn = find_public_ip_vnn(ctdb, addr);
2084         if (vnn == NULL) {
2085                 /* if it is not a public ip   it could be our 'single ip' */
2086                 if (ctdb->single_ip_vnn) {
2087                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, addr)) {
2088                                 vnn = ctdb->single_ip_vnn;
2089                         }
2090                 }
2091         }
2092         if (vnn == NULL) {
2093                 DEBUG(DEBUG_ERR,(__location__ " Could not get public ip info, "
2094                                  "'%s'not a public address\n",
2095                                  ctdb_addr_to_str(addr)));
2096                 return -1;
2097         }
2098
2099         /* count how many public ip structures we have */
2100         num = 0;
2101         for (;vnn->ifaces[num];) {
2102                 num++;
2103         }
2104
2105         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2106                 num*sizeof(struct ctdb_control_iface_info);
2107         info = talloc_zero_size(outdata, len);
2108         CTDB_NO_MEMORY(ctdb, info);
2109
2110         info->ip.addr = vnn->public_address;
2111         info->ip.pnn = vnn->pnn;
2112         info->active_idx = 0xFFFFFFFF;
2113
2114         for (i=0; vnn->ifaces[i]; i++) {
2115                 struct ctdb_iface *cur;
2116
2117                 cur = ctdb_find_iface(ctdb, vnn->ifaces[i]);
2118                 if (cur == NULL) {
2119                         DEBUG(DEBUG_CRIT, (__location__ " internal error iface[%s] unknown\n",
2120                                            vnn->ifaces[i]));
2121                         return -1;
2122                 }
2123                 if (vnn->iface == cur) {
2124                         info->active_idx = i;
2125                 }
2126                 strcpy(info->ifaces[i].name, cur->name);
2127                 info->ifaces[i].link_state = cur->link_up;
2128                 info->ifaces[i].references = cur->references;
2129         }
2130         info->num = i;
2131         len = offsetof(struct ctdb_control_public_ip_info, ifaces) +
2132                 i*sizeof(struct ctdb_control_iface_info);
2133
2134         outdata->dsize = len;
2135         outdata->dptr  = (uint8_t *)info;
2136
2137         return 0;
2138 }
2139
2140 int32_t ctdb_control_get_ifaces(struct ctdb_context *ctdb,
2141                                 struct ctdb_req_control *c,
2142                                 TDB_DATA *outdata)
2143 {
2144         int i, num, len;
2145         struct ctdb_control_get_ifaces *ifaces;
2146         struct ctdb_iface *cur;
2147
2148         /* count how many public ip structures we have */
2149         num = 0;
2150         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2151                 num++;
2152         }
2153
2154         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2155                 num*sizeof(struct ctdb_control_iface_info);
2156         ifaces = talloc_zero_size(outdata, len);
2157         CTDB_NO_MEMORY(ctdb, ifaces);
2158
2159         i = 0;
2160         for (cur=ctdb->ifaces;cur;cur=cur->next) {
2161                 strcpy(ifaces->ifaces[i].name, cur->name);
2162                 ifaces->ifaces[i].link_state = cur->link_up;
2163                 ifaces->ifaces[i].references = cur->references;
2164                 i++;
2165         }
2166         ifaces->num = i;
2167         len = offsetof(struct ctdb_control_get_ifaces, ifaces) +
2168                 i*sizeof(struct ctdb_control_iface_info);
2169
2170         outdata->dsize = len;
2171         outdata->dptr  = (uint8_t *)ifaces;
2172
2173         return 0;
2174 }
2175
2176 int32_t ctdb_control_set_iface_link(struct ctdb_context *ctdb,
2177                                     struct ctdb_req_control *c,
2178                                     TDB_DATA indata)
2179 {
2180         struct ctdb_control_iface_info *info;
2181         struct ctdb_iface *iface;
2182         bool link_up = false;
2183
2184         info = (struct ctdb_control_iface_info *)indata.dptr;
2185
2186         if (info->name[CTDB_IFACE_SIZE] != '\0') {
2187                 int len = strnlen(info->name, CTDB_IFACE_SIZE);
2188                 DEBUG(DEBUG_ERR, (__location__ " name[%*.*s] not terminated\n",
2189                                   len, len, info->name));
2190                 return -1;
2191         }
2192
2193         switch (info->link_state) {
2194         case 0:
2195                 link_up = false;
2196                 break;
2197         case 1:
2198                 link_up = true;
2199                 break;
2200         default:
2201                 DEBUG(DEBUG_ERR, (__location__ " link_state[%u] invalid\n",
2202                                   (unsigned int)info->link_state));
2203                 return -1;
2204         }
2205
2206         if (info->references != 0) {
2207                 DEBUG(DEBUG_ERR, (__location__ " references[%u] should be 0\n",
2208                                   (unsigned int)info->references));
2209                 return -1;
2210         }
2211
2212         iface = ctdb_find_iface(ctdb, info->name);
2213         if (iface == NULL) {
2214                 DEBUG(DEBUG_ERR, (__location__ "iface[%s] is unknown\n",
2215                                   info->name));
2216                 return -1;
2217         }
2218
2219         if (link_up == iface->link_up) {
2220                 return 0;
2221         }
2222
2223         DEBUG(iface->link_up?DEBUG_ERR:DEBUG_NOTICE,
2224               ("iface[%s] has changed it's link status %s => %s\n",
2225                iface->name,
2226                iface->link_up?"up":"down",
2227                link_up?"up":"down"));
2228
2229         iface->link_up = link_up;
2230         return 0;
2231 }
2232
2233
2234 /* 
2235    structure containing the listening socket and the list of tcp connections
2236    that the ctdb daemon is to kill
2237 */
2238 struct ctdb_kill_tcp {
2239         struct ctdb_vnn *vnn;
2240         struct ctdb_context *ctdb;
2241         int capture_fd;
2242         struct fd_event *fde;
2243         trbt_tree_t *connections;
2244         void *private_data;
2245 };
2246
2247 /*
2248   a tcp connection that is to be killed
2249  */
2250 struct ctdb_killtcp_con {
2251         ctdb_sock_addr src_addr;
2252         ctdb_sock_addr dst_addr;
2253         int count;
2254         struct ctdb_kill_tcp *killtcp;
2255 };
2256
2257 /* this function is used to create a key to represent this socketpair
2258    in the killtcp tree.
2259    this key is used to insert and lookup matching socketpairs that are
2260    to be tickled and RST
2261 */
2262 #define KILLTCP_KEYLEN  10
2263 static uint32_t *killtcp_key(ctdb_sock_addr *src, ctdb_sock_addr *dst)
2264 {
2265         static uint32_t key[KILLTCP_KEYLEN];
2266
2267         bzero(key, sizeof(key));
2268
2269         if (src->sa.sa_family != dst->sa.sa_family) {
2270                 DEBUG(DEBUG_ERR, (__location__ " ERROR, different families passed :%u vs %u\n", src->sa.sa_family, dst->sa.sa_family));
2271                 return key;
2272         }
2273         
2274         switch (src->sa.sa_family) {
2275         case AF_INET:
2276                 key[0]  = dst->ip.sin_addr.s_addr;
2277                 key[1]  = src->ip.sin_addr.s_addr;
2278                 key[2]  = dst->ip.sin_port;
2279                 key[3]  = src->ip.sin_port;
2280                 break;
2281         case AF_INET6:
2282                 key[0]  = dst->ip6.sin6_addr.s6_addr32[3];
2283                 key[1]  = src->ip6.sin6_addr.s6_addr32[3];
2284                 key[2]  = dst->ip6.sin6_addr.s6_addr32[2];
2285                 key[3]  = src->ip6.sin6_addr.s6_addr32[2];
2286                 key[4]  = dst->ip6.sin6_addr.s6_addr32[1];
2287                 key[5]  = src->ip6.sin6_addr.s6_addr32[1];
2288                 key[6]  = dst->ip6.sin6_addr.s6_addr32[0];
2289                 key[7]  = src->ip6.sin6_addr.s6_addr32[0];
2290                 key[8]  = dst->ip6.sin6_port;
2291                 key[9]  = src->ip6.sin6_port;
2292                 break;
2293         default:
2294                 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", src->sa.sa_family));
2295                 return key;
2296         }
2297
2298         return key;
2299 }
2300
2301 /*
2302   called when we get a read event on the raw socket
2303  */
2304 static void capture_tcp_handler(struct event_context *ev, struct fd_event *fde, 
2305                                 uint16_t flags, void *private_data)
2306 {
2307         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2308         struct ctdb_killtcp_con *con;
2309         ctdb_sock_addr src, dst;
2310         uint32_t ack_seq, seq;
2311
2312         if (!(flags & EVENT_FD_READ)) {
2313                 return;
2314         }
2315
2316         if (ctdb_sys_read_tcp_packet(killtcp->capture_fd,
2317                                 killtcp->private_data,
2318                                 &src, &dst,
2319                                 &ack_seq, &seq) != 0) {
2320                 /* probably a non-tcp ACK packet */
2321                 return;
2322         }
2323
2324         /* check if we have this guy in our list of connections
2325            to kill
2326         */
2327         con = trbt_lookuparray32(killtcp->connections, 
2328                         KILLTCP_KEYLEN, killtcp_key(&src, &dst));
2329         if (con == NULL) {
2330                 /* no this was some other packet we can just ignore */
2331                 return;
2332         }
2333
2334         /* This one has been tickled !
2335            now reset him and remove him from the list.
2336          */
2337         DEBUG(DEBUG_INFO, ("sending a tcp reset to kill connection :%d -> %s:%d\n",
2338                 ntohs(con->dst_addr.ip.sin_port),
2339                 ctdb_addr_to_str(&con->src_addr),
2340                 ntohs(con->src_addr.ip.sin_port)));
2341
2342         ctdb_sys_send_tcp(&con->dst_addr, &con->src_addr, ack_seq, seq, 1);
2343         talloc_free(con);
2344 }
2345
2346
2347 /* when traversing the list of all tcp connections to send tickle acks to
2348    (so that we can capture the ack coming back and kill the connection
2349     by a RST)
2350    this callback is called for each connection we are currently trying to kill
2351 */
2352 static void tickle_connection_traverse(void *param, void *data)
2353 {
2354         struct ctdb_killtcp_con *con = talloc_get_type(data, struct ctdb_killtcp_con);
2355
2356         /* have tried too many times, just give up */
2357         if (con->count >= 5) {
2358                 /* can't delete in traverse: reparent to delete_cons */
2359                 talloc_steal(param, con);
2360                 return;
2361         }
2362
2363         /* othervise, try tickling it again */
2364         con->count++;
2365         ctdb_sys_send_tcp(
2366                 (ctdb_sock_addr *)&con->dst_addr,
2367                 (ctdb_sock_addr *)&con->src_addr,
2368                 0, 0, 0);
2369 }
2370
2371
2372 /* 
2373    called every second until all sentenced connections have been reset
2374  */
2375 static void ctdb_tickle_sentenced_connections(struct event_context *ev, struct timed_event *te, 
2376                                               struct timeval t, void *private_data)
2377 {
2378         struct ctdb_kill_tcp *killtcp = talloc_get_type(private_data, struct ctdb_kill_tcp);
2379         void *delete_cons = talloc_new(NULL);
2380
2381         /* loop over all connections sending tickle ACKs */
2382         trbt_traversearray32(killtcp->connections, KILLTCP_KEYLEN, tickle_connection_traverse, delete_cons);
2383
2384         /* now we've finished traverse, it's safe to do deletion. */
2385         talloc_free(delete_cons);
2386
2387         /* If there are no more connections to kill we can remove the
2388            entire killtcp structure
2389          */
2390         if ( (killtcp->connections == NULL) || 
2391              (killtcp->connections->root == NULL) ) {
2392                 talloc_free(killtcp);
2393                 return;
2394         }
2395
2396         /* try tickling them again in a seconds time
2397          */
2398         event_add_timed(killtcp->ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2399                         ctdb_tickle_sentenced_connections, killtcp);
2400 }
2401
2402 /*
2403   destroy the killtcp structure
2404  */
2405 static int ctdb_killtcp_destructor(struct ctdb_kill_tcp *killtcp)
2406 {
2407         if (killtcp->vnn) {
2408                 killtcp->vnn->killtcp = NULL;
2409         }
2410         return 0;
2411 }
2412
2413
2414 /* nothing fancy here, just unconditionally replace any existing
2415    connection structure with the new one.
2416
2417    dont even free the old one if it did exist, that one is talloc_stolen
2418    by the same node in the tree anyway and will be deleted when the new data 
2419    is deleted
2420 */
2421 static void *add_killtcp_callback(void *parm, void *data)
2422 {
2423         return parm;
2424 }
2425
2426 /*
2427   add a tcp socket to the list of connections we want to RST
2428  */
2429 static int ctdb_killtcp_add_connection(struct ctdb_context *ctdb, 
2430                                        ctdb_sock_addr *s,
2431                                        ctdb_sock_addr *d)
2432 {
2433         ctdb_sock_addr src, dst;
2434         struct ctdb_kill_tcp *killtcp;
2435         struct ctdb_killtcp_con *con;
2436         struct ctdb_vnn *vnn;
2437
2438         ctdb_canonicalize_ip(s, &src);
2439         ctdb_canonicalize_ip(d, &dst);
2440
2441         vnn = find_public_ip_vnn(ctdb, &dst);
2442         if (vnn == NULL) {
2443                 vnn = find_public_ip_vnn(ctdb, &src);
2444         }
2445         if (vnn == NULL) {
2446                 /* if it is not a public ip   it could be our 'single ip' */
2447                 if (ctdb->single_ip_vnn) {
2448                         if (ctdb_same_ip(&ctdb->single_ip_vnn->public_address, &dst)) {
2449                                 vnn = ctdb->single_ip_vnn;
2450                         }
2451                 }
2452         }
2453         if (vnn == NULL) {
2454                 DEBUG(DEBUG_ERR,(__location__ " Could not killtcp, not a public address\n")); 
2455                 return -1;
2456         }
2457
2458         killtcp = vnn->killtcp;
2459         
2460         /* If this is the first connection to kill we must allocate
2461            a new structure
2462          */
2463         if (killtcp == NULL) {
2464                 killtcp = talloc_zero(ctdb, struct ctdb_kill_tcp);
2465                 CTDB_NO_MEMORY(ctdb, killtcp);
2466
2467                 killtcp->vnn         = vnn;
2468                 killtcp->ctdb        = ctdb;
2469                 killtcp->capture_fd  = -1;
2470                 killtcp->connections = trbt_create(killtcp, 0);
2471
2472                 vnn->killtcp         = killtcp;
2473                 talloc_set_destructor(killtcp, ctdb_killtcp_destructor);
2474         }
2475
2476
2477
2478         /* create a structure that describes this connection we want to
2479            RST and store it in killtcp->connections
2480         */
2481         con = talloc(killtcp, struct ctdb_killtcp_con);
2482         CTDB_NO_MEMORY(ctdb, con);
2483         con->src_addr = src;
2484         con->dst_addr = dst;
2485         con->count    = 0;
2486         con->killtcp  = killtcp;
2487
2488
2489         trbt_insertarray32_callback(killtcp->connections,
2490                         KILLTCP_KEYLEN, killtcp_key(&con->dst_addr, &con->src_addr),
2491                         add_killtcp_callback, con);
2492
2493         /* 
2494            If we dont have a socket to listen on yet we must create it
2495          */
2496         if (killtcp->capture_fd == -1) {
2497                 const char *iface = ctdb_vnn_iface_string(vnn);
2498                 killtcp->capture_fd = ctdb_sys_open_capture_socket(iface, &killtcp->private_data);
2499                 if (killtcp->capture_fd == -1) {
2500                         DEBUG(DEBUG_CRIT,(__location__ " Failed to open capturing "
2501                                           "socket on iface '%s' for killtcp (%s)\n",
2502                                           iface, strerror(errno)));
2503                         goto failed;
2504                 }
2505         }
2506
2507
2508         if (killtcp->fde == NULL) {
2509                 killtcp->fde = event_add_fd(ctdb->ev, killtcp, killtcp->capture_fd, 
2510                                             EVENT_FD_READ,
2511                                             capture_tcp_handler, killtcp);
2512                 tevent_fd_set_auto_close(killtcp->fde);
2513
2514                 /* We also need to set up some events to tickle all these connections
2515                    until they are all reset
2516                 */
2517                 event_add_timed(ctdb->ev, killtcp, timeval_current_ofs(1, 0), 
2518                                 ctdb_tickle_sentenced_connections, killtcp);
2519         }
2520
2521         /* tickle him once now */
2522         ctdb_sys_send_tcp(
2523                 &con->dst_addr,
2524                 &con->src_addr,
2525                 0, 0, 0);
2526
2527         return 0;
2528
2529 failed:
2530         talloc_free(vnn->killtcp);
2531         vnn->killtcp = NULL;
2532         return -1;
2533 }
2534
2535 /*
2536   kill a TCP connection.
2537  */
2538 int32_t ctdb_control_kill_tcp(struct ctdb_context *ctdb, TDB_DATA indata)
2539 {
2540         struct ctdb_control_killtcp *killtcp = (struct ctdb_control_killtcp *)indata.dptr;
2541
2542         return ctdb_killtcp_add_connection(ctdb, &killtcp->src_addr, &killtcp->dst_addr);
2543 }
2544
2545 /*
2546   called by a daemon to inform us of the entire list of TCP tickles for
2547   a particular public address.
2548   this control should only be sent by the node that is currently serving
2549   that public address.
2550  */
2551 int32_t ctdb_control_set_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata)
2552 {
2553         struct ctdb_control_tcp_tickle_list *list = (struct ctdb_control_tcp_tickle_list *)indata.dptr;
2554         struct ctdb_tcp_array *tcparray;
2555         struct ctdb_vnn *vnn;
2556
2557         /* We must at least have tickles.num or else we cant verify the size
2558            of the received data blob
2559          */
2560         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2561                                         tickles.connections)) {
2562                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list. Not enough data for the tickle.num field\n"));
2563                 return -1;
2564         }
2565
2566         /* verify that the size of data matches what we expect */
2567         if (indata.dsize < offsetof(struct ctdb_control_tcp_tickle_list, 
2568                                 tickles.connections)
2569                          + sizeof(struct ctdb_tcp_connection)
2570                                  * list->tickles.num) {
2571                 DEBUG(DEBUG_ERR,("Bad indata in ctdb_control_set_tcp_tickle_list\n"));
2572                 return -1;
2573         }       
2574
2575         vnn = find_public_ip_vnn(ctdb, &list->addr);
2576         if (vnn == NULL) {
2577                 DEBUG(DEBUG_INFO,(__location__ " Could not set tcp tickle list, '%s' is not a public address\n", 
2578                         ctdb_addr_to_str(&list->addr)));
2579
2580                 return 1;
2581         }
2582
2583         /* remove any old ticklelist we might have */
2584         talloc_free(vnn->tcp_array);
2585         vnn->tcp_array = NULL;
2586
2587         tcparray = talloc(ctdb->nodes, struct ctdb_tcp_array);
2588         CTDB_NO_MEMORY(ctdb, tcparray);
2589
2590         tcparray->num = list->tickles.num;
2591
2592         tcparray->connections = talloc_array(tcparray, struct ctdb_tcp_connection, tcparray->num);
2593         CTDB_NO_MEMORY(ctdb, tcparray->connections);
2594
2595         memcpy(tcparray->connections, &list->tickles.connections[0], 
2596                sizeof(struct ctdb_tcp_connection)*tcparray->num);
2597
2598         /* We now have a new fresh tickle list array for this vnn */
2599         vnn->tcp_array = talloc_steal(vnn, tcparray);
2600         
2601         return 0;
2602 }
2603
2604 /*
2605   called to return the full list of tickles for the puclic address associated 
2606   with the provided vnn
2607  */
2608 int32_t ctdb_control_get_tcp_tickle_list(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
2609 {
2610         ctdb_sock_addr *addr = (ctdb_sock_addr *)indata.dptr;
2611         struct ctdb_control_tcp_tickle_list *list;
2612         struct ctdb_tcp_array *tcparray;
2613         int num;
2614         struct ctdb_vnn *vnn;
2615
2616         vnn = find_public_ip_vnn(ctdb, addr);
2617         if (vnn == NULL) {
2618                 DEBUG(DEBUG_ERR,(__location__ " Could not get tcp tickle list, '%s' is not a public address\n", 
2619                         ctdb_addr_to_str(addr)));
2620
2621                 return 1;
2622         }
2623
2624         tcparray = vnn->tcp_array;
2625         if (tcparray) {
2626                 num = tcparray->num;
2627         } else {
2628                 num = 0;
2629         }
2630
2631         outdata->dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2632                                 tickles.connections)
2633                         + sizeof(struct ctdb_tcp_connection) * num;
2634
2635         outdata->dptr  = talloc_size(outdata, outdata->dsize);
2636         CTDB_NO_MEMORY(ctdb, outdata->dptr);
2637         list = (struct ctdb_control_tcp_tickle_list *)outdata->dptr;
2638
2639         list->addr = *addr;
2640         list->tickles.num = num;
2641         if (num) {
2642                 memcpy(&list->tickles.connections[0], tcparray->connections, 
2643                         sizeof(struct ctdb_tcp_connection) * num);
2644         }
2645
2646         return 0;
2647 }
2648
2649
2650 /*
2651   set the list of all tcp tickles for a public address
2652  */
2653 static int ctdb_ctrl_set_tcp_tickles(struct ctdb_context *ctdb, 
2654                               struct timeval timeout, uint32_t destnode, 
2655                               ctdb_sock_addr *addr,
2656                               struct ctdb_tcp_array *tcparray)
2657 {
2658         int ret, num;
2659         TDB_DATA data;
2660         struct ctdb_control_tcp_tickle_list *list;
2661
2662         if (tcparray) {
2663                 num = tcparray->num;
2664         } else {
2665                 num = 0;
2666         }
2667
2668         data.dsize = offsetof(struct ctdb_control_tcp_tickle_list, 
2669                                 tickles.connections) +
2670                         sizeof(struct ctdb_tcp_connection) * num;
2671         data.dptr = talloc_size(ctdb, data.dsize);
2672         CTDB_NO_MEMORY(ctdb, data.dptr);
2673
2674         list = (struct ctdb_control_tcp_tickle_list *)data.dptr;
2675         list->addr = *addr;
2676         list->tickles.num = num;
2677         if (tcparray) {
2678                 memcpy(&list->tickles.connections[0], tcparray->connections, sizeof(struct ctdb_tcp_connection) * num);
2679         }
2680
2681         ret = ctdb_daemon_send_control(ctdb, CTDB_BROADCAST_CONNECTED, 0, 
2682                                        CTDB_CONTROL_SET_TCP_TICKLE_LIST,
2683                                        0, CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL);
2684         if (ret != 0) {
2685                 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set tcp tickles failed\n"));
2686                 return -1;
2687         }
2688
2689         talloc_free(data.dptr);
2690
2691         return ret;
2692 }
2693
2694
2695 /*
2696   perform tickle updates if required
2697  */
2698 static void ctdb_update_tcp_tickles(struct event_context *ev, 
2699                                 struct timed_event *te, 
2700                                 struct timeval t, void *private_data)
2701 {
2702         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
2703         int ret;
2704         struct ctdb_vnn *vnn;
2705
2706         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2707                 /* we only send out updates for public addresses that 
2708                    we have taken over
2709                  */
2710                 if (ctdb->pnn != vnn->pnn) {
2711                         continue;
2712                 }
2713                 /* We only send out the updates if we need to */
2714                 if (!vnn->tcp_update_needed) {
2715                         continue;
2716                 }
2717                 ret = ctdb_ctrl_set_tcp_tickles(ctdb, 
2718                                 TAKEOVER_TIMEOUT(),
2719                                 CTDB_BROADCAST_CONNECTED,
2720                                 &vnn->public_address,
2721                                 vnn->tcp_array);
2722                 if (ret != 0) {
2723                         DEBUG(DEBUG_ERR,("Failed to send the tickle update for public address %s\n",
2724                                 ctdb_addr_to_str(&vnn->public_address)));
2725                 }
2726         }
2727
2728         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2729                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2730                              ctdb_update_tcp_tickles, ctdb);
2731 }               
2732         
2733
2734 /*
2735   start periodic update of tcp tickles
2736  */
2737 void ctdb_start_tcp_tickle_update(struct ctdb_context *ctdb)
2738 {
2739         ctdb->tickle_update_context = talloc_new(ctdb);
2740
2741         event_add_timed(ctdb->ev, ctdb->tickle_update_context,
2742                              timeval_current_ofs(ctdb->tunable.tickle_update_interval, 0), 
2743                              ctdb_update_tcp_tickles, ctdb);
2744 }
2745
2746
2747
2748
2749 struct control_gratious_arp {
2750         struct ctdb_context *ctdb;
2751         ctdb_sock_addr addr;
2752         const char *iface;
2753         int count;
2754 };
2755
2756 /*
2757   send a control_gratuitous arp
2758  */
2759 static void send_gratious_arp(struct event_context *ev, struct timed_event *te, 
2760                                   struct timeval t, void *private_data)
2761 {
2762         int ret;
2763         struct control_gratious_arp *arp = talloc_get_type(private_data, 
2764                                                         struct control_gratious_arp);
2765
2766         ret = ctdb_sys_send_arp(&arp->addr, arp->iface);
2767         if (ret != 0) {
2768                 DEBUG(DEBUG_ERR,(__location__ " sending of gratious arp on iface '%s' failed (%s)\n",
2769                                  arp->iface, strerror(errno)));
2770         }
2771
2772
2773         arp->count++;
2774         if (arp->count == CTDB_ARP_REPEAT) {
2775                 talloc_free(arp);
2776                 return;
2777         }
2778
2779         event_add_timed(arp->ctdb->ev, arp, 
2780                         timeval_current_ofs(CTDB_ARP_INTERVAL, 0), 
2781                         send_gratious_arp, arp);
2782 }
2783
2784
2785 /*
2786   send a gratious arp 
2787  */
2788 int32_t ctdb_control_send_gratious_arp(struct ctdb_context *ctdb, TDB_DATA indata)
2789 {
2790         struct ctdb_control_gratious_arp *gratious_arp = (struct ctdb_control_gratious_arp *)indata.dptr;
2791         struct control_gratious_arp *arp;
2792
2793         /* verify the size of indata */
2794         if (indata.dsize < offsetof(struct ctdb_control_gratious_arp, iface)) {
2795                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_gratious_arp structure. Got %u require %u bytes\n", 
2796                                  (unsigned)indata.dsize, 
2797                                  (unsigned)offsetof(struct ctdb_control_gratious_arp, iface)));
2798                 return -1;
2799         }
2800         if (indata.dsize != 
2801                 ( offsetof(struct ctdb_control_gratious_arp, iface)
2802                 + gratious_arp->len ) ){
2803
2804                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2805                         "but should be %u bytes\n", 
2806                          (unsigned)indata.dsize, 
2807                          (unsigned)(offsetof(struct ctdb_control_gratious_arp, iface)+gratious_arp->len)));
2808                 return -1;
2809         }
2810
2811
2812         arp = talloc(ctdb, struct control_gratious_arp);
2813         CTDB_NO_MEMORY(ctdb, arp);
2814
2815         arp->ctdb  = ctdb;
2816         arp->addr   = gratious_arp->addr;
2817         arp->iface = talloc_strdup(arp, gratious_arp->iface);
2818         CTDB_NO_MEMORY(ctdb, arp->iface);
2819         arp->count = 0;
2820         
2821         event_add_timed(arp->ctdb->ev, arp, 
2822                         timeval_zero(), send_gratious_arp, arp);
2823
2824         return 0;
2825 }
2826
2827 int32_t ctdb_control_add_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2828 {
2829         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2830         int ret;
2831
2832         /* verify the size of indata */
2833         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2834                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2835                 return -1;
2836         }
2837         if (indata.dsize != 
2838                 ( offsetof(struct ctdb_control_ip_iface, iface)
2839                 + pub->len ) ){
2840
2841                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2842                         "but should be %u bytes\n", 
2843                          (unsigned)indata.dsize, 
2844                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2845                 return -1;
2846         }
2847
2848         ret = ctdb_add_public_address(ctdb, &pub->addr, pub->mask, &pub->iface[0]);
2849
2850         if (ret != 0) {
2851                 DEBUG(DEBUG_ERR,(__location__ " Failed to add public address\n"));
2852                 return -1;
2853         }
2854
2855         return 0;
2856 }
2857
2858 /*
2859   called when releaseip event finishes for del_public_address
2860  */
2861 static void delete_ip_callback(struct ctdb_context *ctdb, int status, 
2862                                 void *private_data)
2863 {
2864         talloc_free(private_data);
2865 }
2866
2867 int32_t ctdb_control_del_public_address(struct ctdb_context *ctdb, TDB_DATA indata)
2868 {
2869         struct ctdb_control_ip_iface *pub = (struct ctdb_control_ip_iface *)indata.dptr;
2870         struct ctdb_vnn *vnn;
2871         int ret;
2872
2873         /* verify the size of indata */
2874         if (indata.dsize < offsetof(struct ctdb_control_ip_iface, iface)) {
2875                 DEBUG(DEBUG_ERR,(__location__ " Too small indata to hold a ctdb_control_ip_iface structure\n"));
2876                 return -1;
2877         }
2878         if (indata.dsize != 
2879                 ( offsetof(struct ctdb_control_ip_iface, iface)
2880                 + pub->len ) ){
2881
2882                 DEBUG(DEBUG_ERR,(__location__ " Wrong size of indata. Was %u bytes "
2883                         "but should be %u bytes\n", 
2884                          (unsigned)indata.dsize, 
2885                          (unsigned)(offsetof(struct ctdb_control_ip_iface, iface)+pub->len)));
2886                 return -1;
2887         }
2888
2889         /* walk over all public addresses until we find a match */
2890         for (vnn=ctdb->vnn;vnn;vnn=vnn->next) {
2891                 if (ctdb_same_ip(&vnn->public_address, &pub->addr)) {
2892                         TALLOC_CTX *mem_ctx;
2893
2894                         DLIST_REMOVE(ctdb->vnn, vnn);
2895                         if (vnn->iface == NULL) {
2896                                 talloc_free(vnn);
2897                                 return 0;
2898                         }
2899
2900                         mem_ctx = talloc_new(ctdb);
2901                         ret = ctdb_event_script_callback(ctdb, 
2902                                          mem_ctx, delete_ip_callback, mem_ctx,
2903                                          false,
2904                                          CTDB_EVENT_RELEASE_IP,
2905                                          "%s %s %u",
2906                                          ctdb_vnn_iface_string(vnn),
2907                                          ctdb_addr_to_str(&vnn->public_address),
2908                                          vnn->public_netmask_bits);
2909                         ctdb_vnn_unassign_iface(ctdb, vnn);
2910                         talloc_free(vnn);
2911                         if (ret != 0) {
2912                                 return -1;
2913                         }
2914                         return 0;
2915                 }
2916         }
2917
2918         return -1;
2919 }
2920
2921 /* This function is called from the recovery daemon to verify that a remote
2922    node has the expected ip allocation.
2923    This is verified against ctdb->ip_tree
2924 */
2925 int verify_remote_ip_allocation(struct ctdb_context *ctdb, struct ctdb_all_public_ips *ips)
2926 {
2927         struct ctdb_public_ip_list *tmp_ip; 
2928         int i;
2929
2930         if (ctdb->ip_tree == NULL) {
2931                 /* dont know the expected allocation yet, assume remote node
2932                    is correct. */
2933                 return 0;
2934         }
2935
2936         if (ips == NULL) {
2937                 return 0;
2938         }
2939
2940         for (i=0; i<ips->num; i++) {
2941                 tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ips->ips[i].addr));
2942                 if (tmp_ip == NULL) {
2943                         DEBUG(DEBUG_ERR,(__location__ " Could not find host for address %s, reassign ips\n", ctdb_addr_to_str(&ips->ips[i].addr)));
2944                         return -1;
2945                 }
2946
2947                 if (tmp_ip->pnn == -1 || ips->ips[i].pnn == -1) {
2948                         continue;
2949                 }
2950
2951                 if (tmp_ip->pnn != ips->ips[i].pnn) {
2952                         DEBUG(DEBUG_ERR,("Inconsistent ip allocation. Trigger reallocation. Thinks %s is held by node %u while it is held by node %u\n", ctdb_addr_to_str(&ips->ips[i].addr), ips->ips[i].pnn, tmp_ip->pnn));
2953                         return -1;
2954                 }
2955         }
2956
2957         return 0;
2958 }
2959
2960 int update_ip_assignment_tree(struct ctdb_context *ctdb, struct ctdb_public_ip *ip)
2961 {
2962         struct ctdb_public_ip_list *tmp_ip; 
2963
2964         if (ctdb->ip_tree == NULL) {
2965                 DEBUG(DEBUG_ERR,("No ctdb->ip_tree yet. Failed to update ip assignment\n"));
2966                 return -1;
2967         }
2968
2969         tmp_ip = trbt_lookuparray32(ctdb->ip_tree, IP_KEYLEN, ip_key(&ip->addr));
2970         if (tmp_ip == NULL) {
2971                 DEBUG(DEBUG_ERR,(__location__ " Could not find record for address %s, update ip\n", ctdb_addr_to_str(&ip->addr)));
2972                 return -1;
2973         }
2974
2975         DEBUG(DEBUG_NOTICE,("Updated ip assignment tree for ip : %s from node %u to node %u\n", ctdb_addr_to_str(&ip->addr), tmp_ip->pnn, ip->pnn));
2976         tmp_ip->pnn = ip->pnn;
2977
2978         return 0;
2979 }