CTDB_COMMON_OBJ = common/ctdb_io.o common/ctdb_util.o \
common/ctdb_ltdb.o common/ctdb_message.o common/cmdline.o \
lib/util/debug.o common/rb_tree.o @CTDB_SYSTEM_OBJ@ common/system_common.o \
- common/ctdb_logging.c
+ common/ctdb_logging.c common/ctdb_fork.o
CTDB_LIB_OBJ = libctdb/ctdb.o libctdb/io_elem.o libctdb/local_tdb.o \
libctdb/messages.o libctdb/sync.o libctdb/control.o \
--- /dev/null
+/*
+ functions to track and manage processes
+
+ Copyright (C) Ronnie Sahlberg 2012
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "system/wait.h"
+#include "../include/ctdb_client.h"
+#include "../include/ctdb_private.h"
+#include "../common/rb_tree.h"
+
+/*
+ * This function forks a child process and drops the realtime
+ * scheduler for the child process.
+ */
+pid_t ctdb_fork(struct ctdb_context *ctdb)
+{
+ pid_t pid;
+ char *process;
+
+ pid = fork();
+ if (pid == -1) {
+ return -1;
+ }
+ if (pid == 0) {
+ if (ctdb->do_setsched) {
+ ctdb_restore_scheduler(ctdb);
+ }
+ ctdb->can_send_controls = false;
+ return 0;
+ }
+
+ if (getpid() != ctdb->ctdbd_pid) {
+ return pid;
+ }
+
+ process = talloc_asprintf(ctdb->child_processes, "process:%d", (int)pid);
+ trbt_insert32(ctdb->child_processes, pid, process);
+
+ return pid;
+}
+
+
+
+static void ctdb_sigchld_handler(struct tevent_context *ev,
+ struct tevent_signal *te, int signum, int count,
+ void *dont_care,
+ void *private_data)
+{
+ struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
+ int status;
+ pid_t pid = -1;
+
+ while (pid != 0) {
+ pid = waitpid(-1, &status, WNOHANG);
+ if (pid == -1) {
+ DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%d\n", errno));
+ return;
+ }
+ if (pid > 0) {
+ char *process;
+
+ if (getpid() != ctdb->ctdbd_pid) {
+ continue;
+ }
+
+ process = trbt_lookup32(ctdb->child_processes, pid);
+ if (process == NULL) {
+ DEBUG(DEBUG_ERR,("Got SIGCHLD from pid:%d we didn not spawn with ctdb_fork\n", pid));
+ }
+
+ DEBUG(DEBUG_DEBUG, ("SIGCHLD from %d %s\n", (int)pid, process));
+ talloc_free(process);
+ }
+ }
+}
+
+
+struct tevent_signal *
+ctdb_init_sigchld(struct ctdb_context *ctdb)
+{
+ struct tevent_signal *se;
+
+ ctdb->child_processes = trbt_create(ctdb, 0);
+
+ se = tevent_add_signal(ctdb->ev, ctdb, SIGCHLD, 0, ctdb_sigchld_handler, ctdb);
+ return se;
+}
+
+int
+ctdb_kill(struct ctdb_context *ctdb, pid_t pid, int signum)
+{
+ char *process;
+
+ if (signum == 0) {
+ return kill(pid, signum);
+ }
+
+ if (getpid() != ctdb->ctdbd_pid) {
+ return kill(pid, signum);
+ }
+
+ process = trbt_lookup32(ctdb->child_processes, pid);
+ if (process == NULL) {
+ DEBUG(DEBUG_ERR,("ctdb_kill: trying to kill(%d, %d) a process that does not exist\n", pid, signum));
+ return 0;
+ }
+
+ return kill(pid, signum);
+}
#endif
}
-/*
- * This function forks a child process and drops the realtime
- * scheduler for the child process.
- */
-pid_t ctdb_fork(struct ctdb_context *ctdb)
-{
- pid_t pid;
-
- pid = fork();
- if (pid == 0) {
- if (ctdb->do_setsched) {
- ctdb_restore_scheduler(ctdb);
- }
- ctdb->can_send_controls = false;
- }
- return pid;
-}
-
void set_nonblocking(int fd)
{
unsigned v;
struct ctdb_reloadips_handle *reload_ips;
const char *public_addresses_file;
+ struct trbt_tree *child_processes;
};
struct ctdb_db_context {
bool ctdb_blocking_freeze(struct ctdb_context *ctdb);
void ctdb_set_scheduler(struct ctdb_context *ctdb);
void ctdb_restore_scheduler(struct ctdb_context *ctdb);
+
+struct tevent_signal *ctdb_init_sigchld(struct ctdb_context *ctdb);
pid_t ctdb_fork(struct ctdb_context *ctdb);
+int ctdb_kill(struct ctdb_context *ctdb, pid_t pid, int signum);
+
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA indata,
if (rc->fd[1] != -1) {
close(rc->fd[1]);
}
- kill(rc->child, SIGKILL);
+ ctdb_kill(rc->ctdb, rc->child, SIGKILL);
DLIST_REMOVE(rc->ctdb_db->revokechild_active, rc);
return 0;
child_finished:
write(rc->fd[1], &c, 1);
/* make sure we die when our parent dies */
- while (kill(parent, 0) == 0 || errno != ESRCH) {
+ while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
}
_exit(0);
return -1;
}
-static void sig_child_handler(struct event_context *ev,
- struct signal_event *se, int signum, int count,
- void *dont_care,
- void *private_data)
-{
-// struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
- int status;
- pid_t pid = -1;
-
- while (pid != 0) {
- pid = waitpid(-1, &status, WNOHANG);
- if (pid == -1) {
- DEBUG(DEBUG_ERR, (__location__ " waitpid() returned error. errno:%d\n", errno));
- return;
- }
- if (pid > 0) {
- DEBUG(DEBUG_DEBUG, ("SIGCHLD from %d\n", (int)pid));
- }
- }
-}
-
static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status,
void *private_data)
{
int res, ret = -1;
struct fd_event *fde;
const char *domain_socket_name;
- struct signal_event *se;
/* get rid of any old sockets */
unlink(ctdb->daemon.name);
ctdbd_pid = getpid();
ctdb->ctdbd_pid = ctdbd_pid;
-
DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdbd_pid));
if (ctdb->do_setsched) {
exit(1);
}
+ /* set up a handler to pick up sigchld */
+ if (ctdb_init_sigchld(ctdb) == NULL) {
+ DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD\n"));
+ exit(1);
+ }
+
ctdb_set_child_logging(ctdb);
/* initialize statistics collection */
/* start the transport going */
ctdb_start_transport(ctdb);
- /* set up a handler to pick up sigchld */
- se = event_add_signal(ctdb->ev, ctdb,
- SIGCHLD, 0,
- sig_child_handler,
- ctdb);
- if (se == NULL) {
- DEBUG(DEBUG_CRIT,("Failed to set up signal handler for SIGCHLD\n"));
- exit(1);
- }
-
ret = ctdb_event_script_callback(ctdb,
ctdb,
ctdb_setup_event_callback,
ctdb->freeze_mode[h->priority] = CTDB_FREEZE_NONE;
ctdb->freeze_handles[h->priority] = NULL;
- kill(h->child, SIGKILL);
+ ctdb_kill(h->ctdb, h->child, SIGKILL);
return 0;
}
return NULL;
}
- h->child = fork();
+ h->child = ctdb_fork(ctdb);
if (h->child == -1) {
DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
talloc_free(h);
while (1) {
sleep(1);
- if (kill(ctdb->ctdbd_pid, 0) != 0) {
+ if (ctdb_kill(ctdb, ctdb->ctdbd_pid, 0) != 0) {
DEBUG(DEBUG_ERR,("Parent died. Exiting lock wait child\n"));
_exit(0);
static int lockwait_destructor(struct lockwait_handle *h)
{
CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
- kill(h->child, SIGKILL);
+ ctdb_kill(h->ctdb, h->child, SIGKILL);
h->ctdb_db->pending_requests--;
DLIST_REMOVE(h->ctdb_db->lockwait_active, h);
return 0;
tdb_chainlock(ctdb_db->ltdb->tdb, key);
write(result->fd[1], &c, 1);
/* make sure we die when our parent dies */
- while (kill(parent, 0) == 0 || errno != ESRCH) {
+ while (ctdb_kill(ctdb_db->ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
}
_exit(0);
if (state->fd[1] != -1) {
state->fd[1] = -1;
}
- kill(state->child, SIGKILL);
+ ctdb_kill(state->ctdb, state->child, SIGKILL);
return 0;
}
return -1;
}
- state->child = fork();
+ state->child = ctdb_fork(ctdb);
if (state->child == (pid_t)-1) {
close(state->fd[0]);
close(state->fd[1]);
write(state->fd[1], &cc, 1);
/* make sure we die when our parent dies */
- while (kill(parent, 0) == 0 || errno != ESRCH) {
+ while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
write(state->fd[1], &cc, 1);
}
close(state->fd[1]);
state->fd[1] = -1;
}
- kill(state->child, SIGKILL);
+ ctdb_kill(ctdb, state->child, SIGKILL);
return 0;
}
write(state->fd[1], &cc, 1);
/* make sure we die when our parent dies */
- while (kill(parent, 0) == 0 || errno != ESRCH) {
+ while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
write(state->fd[1], &cc, 1);
}
/* verify that the main daemon is still running */
- if (kill(ctdb->ctdbd_pid, 0) != 0) {
+ if (ctdb_kill(ctdb, ctdb->ctdbd_pid, 0) != 0) {
DEBUG(DEBUG_CRIT,("CTDB daemon is no longer available. Shutting down recovery daemon\n"));
exit(-1);
}
{
struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
- if (kill(ctdb->recoverd_pid, 0) != 0) {
+ if (ctdb_kill(ctdb, ctdb->recoverd_pid, 0) != 0) {
DEBUG(DEBUG_ERR,("Recovery daemon (pid:%d) is no longer running. Trying to restart recovery daemon.\n", (int)ctdb->recoverd_pid));
event_add_timed(ctdb->ev, ctdb, timeval_zero(),
ctdb->ctdbd_pid = getpid();
- ctdb->recoverd_pid = fork();
+ ctdb->recoverd_pid = ctdb_fork(ctdb);
if (ctdb->recoverd_pid == -1) {
return -1;
}
}
DEBUG(DEBUG_NOTICE,("Shutting down recovery daemon\n"));
- kill(ctdb->recoverd_pid, SIGTERM);
+ ctdb_kill(ctdb, ctdb->recoverd_pid, SIGTERM);
}
static void ctdb_restart_recd(struct event_context *ev, struct timed_event *te,
(unsigned)client->pid,
ctdb_addr_to_str(addr),
ip->client_id));
- kill(client->pid, SIGKILL);
+ ctdb_kill(ctdb, client->pid, SIGKILL);
}
}
}
ctdb_request_control_reply(h->ctdb, h->c, NULL, h->status, NULL);
h->c = NULL;
}
- kill(h->child, SIGKILL);
+ ctdb_kill(h->ctdb, h->child, SIGKILL);
return 0;
}
write(h->fd[1], &res, 1);
/* make sure we die when our parent dies */
- while (kill(parent, 0) == 0 || errno != ESRCH) {
+ while (ctdb_kill(ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
}
_exit(0);
static int traverse_local_destructor(struct ctdb_traverse_local_handle *h)
{
DLIST_REMOVE(h->ctdb_db->traverse, h);
- kill(h->child, SIGKILL);
+ ctdb_kill(h->ctdb_db->ctdb, h->child, SIGKILL);
return 0;
}
static int childwrite_destructor(struct childwrite_handle *h)
{
CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
- kill(h->child, SIGKILL);
+ ctdb_kill(h->ctdb, h->child, SIGKILL);
return 0;
}
callback(c, p);
- kill(child, SIGKILL);
+ ctdb_kill(h->ctdb, child, SIGKILL);
talloc_free(tmp_ctx);
}
write(result->fd[1], &c, 1);
/* make sure we die when our parent dies */
- while (kill(parent, 0) == 0 || errno != ESRCH) {
+ while (ctdb_kill(ctdb_db->ctdb, parent, 0) == 0 || errno != ESRCH) {
sleep(5);
}
_exit(0);
DEBUG(DEBUG_INFO,("Vacuuming took %.3f seconds for database %s\n", l, ctdb_db->db_name));
if (child_ctx->child_pid != -1) {
- kill(child_ctx->child_pid, SIGKILL);
+ ctdb_kill(ctdb, child_ctx->child_pid, SIGKILL);
} else {
/* Bump the number of successful fast-path runs. */
child_ctx->vacuum_handle->fast_path_count++;
/* valgrind gets overloaded if we run next script as it's still doing
* post-execution analysis, so kill finished child here. */
if (ctdb->valgrinding) {
- kill(state->child, SIGKILL);
+ ctdb_kill(ctdb, state->child, SIGKILL);
}
state->child = 0;
if (pid == 0) {
system(buf);
/* Now we can kill the child */
- kill(state->child, SIGTERM);
+ ctdb_kill(state->ctdb, state->child, SIGTERM);
exit(0);
}
if (pid == -1) {
if (state->child) {
DEBUG(DEBUG_ERR,(__location__ " Sending SIGTERM to child pid:%d\n", state->child));
- if (kill(state->child, SIGTERM) != 0) {
+ if (ctdb_kill(state->ctdb, state->child, SIGTERM) != 0) {
DEBUG(DEBUG_ERR,("Failed to kill child process for eventscript, errno %s(%d)\n", strerror(errno), errno));
}
}
#include "common/rb_tree.c"
#include "common/system_common.c"
#include "common/ctdb_logging.c"
+#include "common/ctdb_fork.c"
/* CTDB_CLIENT_OBJ */
#include "client/ctdb_client.c"
#include "common/rb_tree.c"
#include "common/system_common.c"
#include "common/ctdb_logging.c"
+#include "common/ctdb_fork.c"
/* CTDB_SERVER_OBJ */
#include "server/ctdb_daemon.c"