rename ctdb.h to ctdb_protocol.h
[sahlberg/ctdb.git] / server / ctdb_keepalive.c
1 /* 
2    monitoring links to all other nodes to detect dead nodes
3
4
5    Copyright (C) Ronnie Sahlberg 2007
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "includes.h"
22 #include "lib/events/events.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "include/ctdb_protocol.h"
26 #include "include/ctdb_private.h"
27
28
29 /*
30   see if any nodes are dead
31  */
32 static void ctdb_check_for_dead_nodes(struct event_context *ev, struct timed_event *te, 
33                                       struct timeval t, void *private_data)
34 {
35         struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
36         int i;
37
38         /* send a keepalive to all other nodes, unless */
39         for (i=0;i<ctdb->num_nodes;i++) {
40                 struct ctdb_node *node = ctdb->nodes[i];
41
42                 if (node->flags & NODE_FLAGS_DELETED) {
43                         continue;
44                 }
45
46                 if (node->pnn == ctdb->pnn) {
47                         continue;
48                 }
49                 
50                 if (node->flags & NODE_FLAGS_DISCONNECTED) {
51                         /* it might have come alive again */
52                         if (node->rx_cnt != 0) {
53                                 ctdb_node_connected(node);
54                         }
55                         continue;
56                 }
57
58
59                 if (node->rx_cnt == 0) {
60                         node->dead_count++;
61                 } else {
62                         node->dead_count = 0;
63                 }
64
65                 node->rx_cnt = 0;
66
67                 if (node->dead_count >= ctdb->tunable.keepalive_limit) {
68                         DEBUG(DEBUG_NOTICE,("dead count reached for node %u\n", node->pnn));
69                         ctdb_node_dead(node);
70                         ctdb_send_keepalive(ctdb, node->pnn);
71                         /* maybe tell the transport layer to kill the
72                            sockets as well?
73                         */
74                         continue;
75                 }
76                 
77                 DEBUG(DEBUG_DEBUG,("sending keepalive to %u\n", node->pnn));
78                 ctdb_send_keepalive(ctdb, node->pnn);
79
80                 node->tx_cnt = 0;
81         }
82         
83         event_add_timed(ctdb->ev, ctdb->keepalive_ctx,
84                         timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
85                         ctdb_check_for_dead_nodes, ctdb);
86 }
87
88
89 void ctdb_start_keepalive(struct ctdb_context *ctdb)
90 {
91         struct timed_event *te;
92
93         ctdb->keepalive_ctx = talloc_new(ctdb);
94         CTDB_NO_MEMORY_FATAL(ctdb, ctdb->keepalive_ctx);
95
96         te = event_add_timed(ctdb->ev, ctdb->keepalive_ctx,
97                              timeval_current_ofs(ctdb->tunable.keepalive_interval, 0), 
98                              ctdb_check_for_dead_nodes, ctdb);
99         CTDB_NO_MEMORY_FATAL(ctdb, te);
100
101         DEBUG(DEBUG_NOTICE,("Keepalive monitoring has been started\n"));
102 }
103
104 void ctdb_stop_keepalive(struct ctdb_context *ctdb)
105 {
106         talloc_free(ctdb->keepalive_ctx);
107         ctdb->keepalive_ctx = NULL;
108 }
109