4 Copyright (C) Andrew Tridgell 2007
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "system/filesys.h"
22 #include "system/wait.h"
23 #include "system/dir.h"
24 #include "system/locale.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/events/events.h"
27 #include "../common/rb_tree.h"
31 const char *script_running;
35 ctdbd sends us a SIGTERM when we should time out the current script
37 static void sigterm(int sig)
39 DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds\n",
40 child_state.script_running, timeval_elapsed(&child_state.start)));
41 /* all the child processes will be running in the same process group */
42 kill(-getpgrp(), SIGKILL);
46 struct ctdb_event_script_state {
47 struct ctdb_context *ctdb;
49 void (*callback)(struct ctdb_context *, int, void *);
56 run the event script - varargs version
57 this function is called and run in the context of a forked child
58 which allows it to do blocking calls such as system()
60 static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
65 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
71 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
72 /* we guarantee that only some specifically allowed event scripts are run
74 const char *allowed_scripts[] = {"startrecovery", "shutdown" };
76 for (i=0;i<ARRAY_SIZE(allowed_scripts);i++) {
77 if (strcmp(options, allowed_scripts[i]) == 0) break;
79 if (i == ARRAY_SIZE(allowed_scripts)) {
80 DEBUG(0,("Refusing to run event scripts with option '%s' while in recovery\n",
86 if (setpgid(0,0) != 0) {
87 DEBUG(DEBUG_ERR,("Failed to create process group for event scripts - %s\n",
93 signal(SIGTERM, sigterm);
95 child_state.start = timeval_current();
96 child_state.script_running = "startup";
99 the service specific event scripts
101 if (stat(ctdb->event_script_dir, &st) != 0 &&
103 DEBUG(DEBUG_CRIT,("No event script directory found at '%s'\n", ctdb->event_script_dir));
104 talloc_free(tmp_ctx);
108 /* create a tree to store all the script names in */
109 tree = trbt_create(tmp_ctx, 0);
111 /* scan all directory entries and insert all valid scripts into the
114 dir = opendir(ctdb->event_script_dir);
116 DEBUG(DEBUG_CRIT,("Failed to open event script directory '%s'\n", ctdb->event_script_dir));
117 talloc_free(tmp_ctx);
121 while ((de=readdir(dir)) != NULL) {
126 namlen = strlen(de->d_name);
132 if (de->d_name[namlen-1] == '~') {
133 /* skip files emacs left behind */
137 if (de->d_name[2] != '.') {
141 if (sscanf(de->d_name, "%02u.", &num) != 1) {
145 /* Make sure the event script is executable */
146 str = talloc_asprintf(tree, "%s/%s", ctdb->event_script_dir, de->d_name);
147 if (stat(str, &st) != 0) {
148 DEBUG(DEBUG_ERR,("Could not stat event script %s. Ignoring this event script\n", str));
151 if (!(st.st_mode & S_IXUSR)) {
152 DEBUG(DEBUG_ERR,("Event script %s is not executable. Ignoring this event script\n", str));
157 /* store the event script in the tree */
158 script = trbt_insert32(tree, num, talloc_strdup(tree, de->d_name));
159 if (script != NULL) {
160 DEBUG(DEBUG_CRIT,("CONFIG ERROR: Multiple event scripts with the same prefix : '%s' and '%s'. Each event script MUST have a unique prefix\n", script, de->d_name));
161 talloc_free(tmp_ctx);
168 /* fetch the scripts from the tree one by one and execute
171 while ((script=trbt_findfirstarray32(tree, 1)) != NULL) {
172 cmdstr = talloc_asprintf(tmp_ctx, "%s/%s %s",
173 ctdb->event_script_dir,
175 CTDB_NO_MEMORY(ctdb, cmdstr);
177 DEBUG(DEBUG_INFO,("Executing event script %s\n",cmdstr));
179 child_state.start = timeval_current();
180 child_state.script_running = cmdstr;
182 ret = system(cmdstr);
183 /* if the system() call was successful, translate ret into the
184 return code from the command
187 ret = WEXITSTATUS(ret);
189 /* return an error if the script failed */
191 DEBUG(DEBUG_ERR,("Event script %s failed with error %d\n", cmdstr, ret));
192 talloc_free(tmp_ctx);
196 /* remove this script from the tree */
200 child_state.start = timeval_current();
201 child_state.script_running = "finished";
203 talloc_free(tmp_ctx);
207 /* called when child is finished */
208 static void ctdb_event_script_handler(struct event_context *ev, struct fd_event *fde,
209 uint16_t flags, void *p)
211 struct ctdb_event_script_state *state =
212 talloc_get_type(p, struct ctdb_event_script_state);
213 void (*callback)(struct ctdb_context *, int, void *) = state->callback;
214 void *private_data = state->private_data;
215 struct ctdb_context *ctdb = state->ctdb;
218 read(state->fd[0], &rt, sizeof(rt));
220 talloc_set_destructor(state, NULL);
222 callback(ctdb, rt, private_data);
224 ctdb->event_script_timeouts = 0;
227 static void ctdb_ban_self(struct ctdb_context *ctdb, uint32_t ban_period)
230 struct ctdb_ban_info b;
234 b.ban_time = ban_period;
236 data.dptr = (uint8_t *)&b;
237 data.dsize = sizeof(b);
239 ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
240 CTDB_SRVID_BAN_NODE, data);
242 DEBUG(DEBUG_ERR,(__location__ " Failed to send ban message\n"));
247 /* called when child times out */
248 static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te,
249 struct timeval t, void *p)
251 struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
252 void (*callback)(struct ctdb_context *, int, void *) = state->callback;
253 void *private_data = state->private_data;
254 struct ctdb_context *ctdb = state->ctdb;
256 DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts));
258 if (!strcmp(state->options, "monitor")) {
259 /* if it is a monitor event, we allow it to "hang" a few times
260 before we declare it a failure and ban ourself (and make
263 DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));
265 ctdb->event_script_timeouts++;
266 if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
267 ctdb->event_script_timeouts = 0;
268 DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
269 ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
270 callback(ctdb, -1, private_data);
272 callback(ctdb, 0, private_data);
274 } else if (!strcmp(state->options, "startup")) {
275 DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
276 callback(ctdb, -1, private_data);
278 /* if it is not a monitor event we ban ourself immediately */
279 DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
280 ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
281 callback(ctdb, -1, private_data);
288 destroy a running event script
290 static int event_script_destructor(struct ctdb_event_script_state *state)
292 DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
293 kill(state->child, SIGTERM);
298 run the event script in the background, calling the callback when
301 static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
302 struct timeval timeout,
304 void (*callback)(struct ctdb_context *, int, void *),
306 const char *fmt, va_list ap)
308 struct ctdb_event_script_state *state;
311 state = talloc(mem_ctx, struct ctdb_event_script_state);
312 CTDB_NO_MEMORY(ctdb, state);
315 state->callback = callback;
316 state->private_data = private_data;
317 state->options = talloc_vasprintf(state, fmt, ap);
318 CTDB_NO_MEMORY(ctdb, state->options);
320 ret = pipe(state->fd);
326 state->child = fork();
328 if (state->child == (pid_t)-1) {
335 if (state->child == 0) {
339 if (ctdb->do_setsched) {
340 ctdb_restore_scheduler(ctdb);
342 set_close_on_exec(state->fd[1]);
343 rt = ctdb_event_script_v(ctdb, state->options);
344 while ((ret = write(state->fd[1], &rt, sizeof(rt))) != sizeof(rt)) {
350 talloc_set_destructor(state, event_script_destructor);
354 event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
355 ctdb_event_script_handler, state);
357 if (!timeval_is_zero(&timeout)) {
358 event_add_timed(ctdb->ev, state, timeout, ctdb_event_script_timeout, state);
360 DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", state->options));
368 run the event script in the background, calling the callback when
371 int ctdb_event_script_callback(struct ctdb_context *ctdb,
372 struct timeval timeout,
374 void (*callback)(struct ctdb_context *, int, void *),
376 const char *fmt, ...)
382 ret = ctdb_event_script_callback_v(ctdb, timeout, mem_ctx, callback, private_data, fmt, ap);
389 struct callback_status {
395 called when ctdb_event_script() finishes
397 static void event_script_callback(struct ctdb_context *ctdb, int status, void *private_data)
399 struct callback_status *s = (struct callback_status *)private_data;
405 run the event script, waiting for it to complete. Used when the caller doesn't want to
406 continue till the event script has finished.
408 int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
412 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
413 struct callback_status status;
416 ret = ctdb_event_script_callback_v(ctdb, timeval_zero(), tmp_ctx, event_script_callback, &status, fmt, ap);
420 talloc_free(tmp_ctx);
427 while (status.done == false && event_loop_once(ctdb->ev) == 0) /* noop */;
429 talloc_free(tmp_ctx);
431 return status.status;
435 struct eventscript_callback_state {
436 struct ctdb_req_control *c;
440 called when takeip event finishes
442 static void run_eventscripts_callback(struct ctdb_context *ctdb, int status,
445 struct eventscript_callback_state *state =
446 talloc_get_type(private_data, struct eventscript_callback_state);
448 ctdb_enable_monitoring(ctdb);
451 DEBUG(DEBUG_ERR,(__location__ " Failed to forcibly run eventscripts\n"));
452 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
457 /* the control succeeded */
458 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
464 A control to force running of the eventscripts from the ctdb client tool
466 int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
467 struct ctdb_req_control *c,
468 TDB_DATA indata, bool *async_reply)
471 struct eventscript_callback_state *state;
473 /* kill off any previous invokations of forced eventscripts */
474 if (ctdb->eventscripts_ctx) {
475 talloc_free(ctdb->eventscripts_ctx);
477 ctdb->eventscripts_ctx = talloc_new(ctdb);
478 CTDB_NO_MEMORY(ctdb, ctdb->eventscripts_ctx);
480 state = talloc(ctdb->eventscripts_ctx, struct eventscript_callback_state);
481 CTDB_NO_MEMORY(ctdb, state);
483 state->c = talloc_steal(ctdb, c);
485 DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
487 ctdb_disable_monitoring(ctdb);
489 ret = ctdb_event_script_callback(ctdb,
490 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
491 state, run_eventscripts_callback, state,
492 (const char *)indata.dptr);
495 ctdb_enable_monitoring(ctdb);
496 DEBUG(DEBUG_ERR,(__location__ " Failed to run eventscripts with arguments %s\n", indata.dptr));
501 /* tell ctdb_control.c that we will be replying asynchronously */