4 Copyright (C) Andrew Tridgell 2007
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "system/filesys.h"
22 #include "system/wait.h"
23 #include "system/dir.h"
24 #include "system/locale.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/events/events.h"
27 #include "../common/rb_tree.h"
31 const char *script_running;
35 ctdbd sends us a SIGTERM when we should time out the current script
37 static void sigterm(int sig)
39 DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds\n",
40 child_state.script_running, timeval_elapsed(&child_state.start)));
41 /* all the child processes will be running in the same process group */
42 kill(-getpgrp(), SIGKILL);
46 struct ctdb_event_script_state {
47 struct ctdb_context *ctdb;
49 void (*callback)(struct ctdb_context *, int, void *);
56 run the event script - varargs version
57 this function is called and run in the context of a forked child
58 which allows it to do blocking calls such as system()
60 static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options)
65 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
71 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
72 /* we guarantee that only some specifically allowed event scripts are run
74 const char *allowed_scripts[] = {"startrecovery", "shutdown" };
76 for (i=0;i<ARRAY_SIZE(allowed_scripts);i++) {
77 if (strcmp(options, allowed_scripts[i]) == 0) break;
79 if (i == ARRAY_SIZE(allowed_scripts)) {
80 DEBUG(0,("Refusing to run event scripts with option '%s' while in recovery\n",
86 if (setpgid(0,0) != 0) {
87 DEBUG(DEBUG_ERR,("Failed to create process group for event scripts - %s\n",
93 signal(SIGTERM, sigterm);
95 child_state.start = timeval_current();
96 child_state.script_running = "startup";
99 the service specific event scripts
101 if (stat(ctdb->event_script_dir, &st) != 0 &&
103 DEBUG(DEBUG_CRIT,("No event script directory found at '%s'\n", ctdb->event_script_dir));
104 talloc_free(tmp_ctx);
108 /* create a tree to store all the script names in */
109 tree = trbt_create(tmp_ctx, 0);
111 /* scan all directory entries and insert all valid scripts into the
114 dir = opendir(ctdb->event_script_dir);
116 DEBUG(DEBUG_CRIT,("Failed to open event script directory '%s'\n", ctdb->event_script_dir));
117 talloc_free(tmp_ctx);
121 while ((de=readdir(dir)) != NULL) {
126 namlen = strlen(de->d_name);
132 if (de->d_name[namlen-1] == '~') {
133 /* skip files emacs left behind */
137 if (de->d_name[2] != '.') {
141 if (sscanf(de->d_name, "%02u.", &num) != 1) {
145 /* Make sure the event script is executable */
146 str = talloc_asprintf(tree, "%s/%s", ctdb->event_script_dir, de->d_name);
147 if (stat(str, &st) != 0) {
148 DEBUG(DEBUG_ERR,("Could not stat event script %s. Ignoring this event script\n", str));
151 if (!(st.st_mode & S_IXUSR)) {
152 DEBUG(DEBUG_ERR,("Event script %s is not executable. Ignoring this event script\n", str));
157 /* store the event script in the tree */
158 script = trbt_insert32(tree, num, talloc_strdup(tree, de->d_name));
159 if (script != NULL) {
160 DEBUG(DEBUG_CRIT,("CONFIG ERROR: Multiple event scripts with the same prefix : '%s' and '%s'. Each event script MUST have a unique prefix\n", script, de->d_name));
161 talloc_free(tmp_ctx);
168 /* fetch the scripts from the tree one by one and execute
171 while ((script=trbt_findfirstarray32(tree, 1)) != NULL) {
172 cmdstr = talloc_asprintf(tmp_ctx, "%s/%s %s",
173 ctdb->event_script_dir,
175 CTDB_NO_MEMORY(ctdb, cmdstr);
177 DEBUG(DEBUG_INFO,("Executing event script %s\n",cmdstr));
179 child_state.start = timeval_current();
180 child_state.script_running = cmdstr;
182 ret = system(cmdstr);
183 /* if the system() call was successful, translate ret into the
184 return code from the command
187 ret = WEXITSTATUS(ret);
189 /* return an error if the script failed */
191 DEBUG(DEBUG_ERR,("Event script %s failed with error %d\n", cmdstr, ret));
192 talloc_free(tmp_ctx);
196 /* remove this script from the tree */
200 child_state.start = timeval_current();
201 child_state.script_running = "finished";
203 talloc_free(tmp_ctx);
207 /* called when child is finished */
208 static void ctdb_event_script_handler(struct event_context *ev, struct fd_event *fde,
209 uint16_t flags, void *p)
211 struct ctdb_event_script_state *state =
212 talloc_get_type(p, struct ctdb_event_script_state);
213 void (*callback)(struct ctdb_context *, int, void *) = state->callback;
214 void *private_data = state->private_data;
215 struct ctdb_context *ctdb = state->ctdb;
218 read(state->fd[0], &rt, sizeof(rt));
220 talloc_set_destructor(state, NULL);
222 callback(ctdb, rt, private_data);
224 ctdb->event_script_timeouts = 0;
227 static void ctdb_ban_self(struct ctdb_context *ctdb, uint32_t ban_period)
230 struct ctdb_ban_info b;
234 b.ban_time = ban_period;
236 data.dptr = (uint8_t *)&b;
237 data.dsize = sizeof(b);
239 ret = ctdb_daemon_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
240 CTDB_SRVID_BAN_NODE, data);
242 DEBUG(DEBUG_ERR,(__location__ " Failed to send ban message\n"));
247 /* called when child times out */
248 static void ctdb_event_script_timeout(struct event_context *ev, struct timed_event *te,
249 struct timeval t, void *p)
251 struct ctdb_event_script_state *state = talloc_get_type(p, struct ctdb_event_script_state);
252 void (*callback)(struct ctdb_context *, int, void *) = state->callback;
253 void *private_data = state->private_data;
254 struct ctdb_context *ctdb = state->ctdb;
257 DEBUG(DEBUG_ERR,("Event script timed out : %s count : %u\n", state->options, ctdb->event_script_timeouts));
259 options = talloc_strdup(ctdb, state->options);
260 CTDB_NO_MEMORY_VOID(ctdb, options);
263 if (!strcmp(options, "monitor")) {
264 /* if it is a monitor event, we allow it to "hang" a few times
265 before we declare it a failure and ban ourself (and make
268 DEBUG(DEBUG_ERR, (__location__ " eventscript for monitor event timedout.\n"));
270 ctdb->event_script_timeouts++;
271 if (ctdb->event_script_timeouts > ctdb->tunable.script_ban_count) {
272 ctdb->event_script_timeouts = 0;
273 DEBUG(DEBUG_ERR, ("Maximum timeout count %u reached for eventscript. Banning self for %d seconds\n", ctdb->tunable.script_ban_count, ctdb->tunable.recovery_ban_period));
274 ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
275 callback(ctdb, -1, private_data);
277 callback(ctdb, 0, private_data);
279 } else if (!strcmp(options, "startup")) {
280 DEBUG(DEBUG_ERR, (__location__ " eventscript for startup event timedout.\n"));
281 callback(ctdb, -1, private_data);
283 /* if it is not a monitor event we ban ourself immediately */
284 DEBUG(DEBUG_ERR, (__location__ " eventscript for NON-monitor/NON-startup event timedout. Immediately banning ourself for %d seconds\n", ctdb->tunable.recovery_ban_period));
285 ctdb_ban_self(ctdb, ctdb->tunable.recovery_ban_period);
286 callback(ctdb, -1, private_data);
289 talloc_free(options);
293 destroy a running event script
295 static int event_script_destructor(struct ctdb_event_script_state *state)
297 DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
298 kill(state->child, SIGTERM);
303 run the event script in the background, calling the callback when
306 static int ctdb_event_script_callback_v(struct ctdb_context *ctdb,
307 struct timeval timeout,
309 void (*callback)(struct ctdb_context *, int, void *),
311 const char *fmt, va_list ap)
313 struct ctdb_event_script_state *state;
316 state = talloc(mem_ctx, struct ctdb_event_script_state);
317 CTDB_NO_MEMORY(ctdb, state);
320 state->callback = callback;
321 state->private_data = private_data;
322 state->options = talloc_vasprintf(state, fmt, ap);
323 CTDB_NO_MEMORY(ctdb, state->options);
325 ret = pipe(state->fd);
331 state->child = fork();
333 if (state->child == (pid_t)-1) {
340 if (state->child == 0) {
344 if (ctdb->do_setsched) {
345 ctdb_restore_scheduler(ctdb);
347 set_close_on_exec(state->fd[1]);
348 rt = ctdb_event_script_v(ctdb, state->options);
349 while ((ret = write(state->fd[1], &rt, sizeof(rt))) != sizeof(rt)) {
355 talloc_set_destructor(state, event_script_destructor);
359 event_add_fd(ctdb->ev, state, state->fd[0], EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
360 ctdb_event_script_handler, state);
362 if (!timeval_is_zero(&timeout)) {
363 event_add_timed(ctdb->ev, state, timeout, ctdb_event_script_timeout, state);
365 DEBUG(DEBUG_ERR, (__location__ " eventscript %s called with no timeout\n", state->options));
373 run the event script in the background, calling the callback when
376 int ctdb_event_script_callback(struct ctdb_context *ctdb,
377 struct timeval timeout,
379 void (*callback)(struct ctdb_context *, int, void *),
381 const char *fmt, ...)
387 ret = ctdb_event_script_callback_v(ctdb, timeout, mem_ctx, callback, private_data, fmt, ap);
394 struct callback_status {
400 called when ctdb_event_script() finishes
402 static void event_script_callback(struct ctdb_context *ctdb, int status, void *private_data)
404 struct callback_status *s = (struct callback_status *)private_data;
410 run the event script, waiting for it to complete. Used when the caller doesn't want to
411 continue till the event script has finished.
413 int ctdb_event_script(struct ctdb_context *ctdb, const char *fmt, ...)
417 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
418 struct callback_status status;
421 ret = ctdb_event_script_callback_v(ctdb, timeval_zero(), tmp_ctx, event_script_callback, &status, fmt, ap);
425 talloc_free(tmp_ctx);
432 while (status.done == false && event_loop_once(ctdb->ev) == 0) /* noop */;
434 talloc_free(tmp_ctx);
436 return status.status;
440 struct eventscript_callback_state {
441 struct ctdb_req_control *c;
445 called when takeip event finishes
447 static void run_eventscripts_callback(struct ctdb_context *ctdb, int status,
450 struct eventscript_callback_state *state =
451 talloc_get_type(private_data, struct eventscript_callback_state);
453 ctdb_enable_monitoring(ctdb);
456 DEBUG(DEBUG_ERR,(__location__ " Failed to forcibly run eventscripts\n"));
457 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
462 /* the control succeeded */
463 ctdb_request_control_reply(ctdb, state->c, NULL, 0, NULL);
469 A control to force running of the eventscripts from the ctdb client tool
471 int32_t ctdb_run_eventscripts(struct ctdb_context *ctdb,
472 struct ctdb_req_control *c,
473 TDB_DATA indata, bool *async_reply)
476 struct eventscript_callback_state *state;
478 /* kill off any previous invokations of forced eventscripts */
479 if (ctdb->eventscripts_ctx) {
480 talloc_free(ctdb->eventscripts_ctx);
482 ctdb->eventscripts_ctx = talloc_new(ctdb);
483 CTDB_NO_MEMORY(ctdb, ctdb->eventscripts_ctx);
485 state = talloc(ctdb->eventscripts_ctx, struct eventscript_callback_state);
486 CTDB_NO_MEMORY(ctdb, state);
488 state->c = talloc_steal(state, c);
490 DEBUG(DEBUG_NOTICE,("Forced running of eventscripts with arguments %s\n", indata.dptr));
492 if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) {
493 DEBUG(DEBUG_ERR, (__location__ " Aborted running eventscript \"%s\" while in RECOVERY mode\n", indata.dptr));
497 ctdb_disable_monitoring(ctdb);
499 ret = ctdb_event_script_callback(ctdb,
500 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
501 state, run_eventscripts_callback, state,
502 (const char *)indata.dptr);
505 ctdb_enable_monitoring(ctdb);
506 DEBUG(DEBUG_ERR,(__location__ " Failed to run eventscripts with arguments %s\n", indata.dptr));
511 /* tell ctdb_control.c that we will be replying asynchronously */