From 3a56a16b06cf6d1cce613ec29f5ea46630902072 Mon Sep 17 00:00:00 2001 From: Amitay Isaacs Date: Tue, 29 Nov 2016 17:20:45 +1100 Subject: [PATCH] ctdb-locking: Explicitly unlock record/db in lock helper BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469 Instead of killing lock helper processes with SIGKILL, send SIGTERM so lock helper processes can explicitly unlock record/db. Signed-off-by: Amitay Isaacs Reviewed-by: Martin Schwenke --- ctdb/server/ctdb_lock.c | 6 +- ctdb/server/ctdb_lock_helper.c | 177 +++++++++++++++++++++++++++++---- ctdb/wscript | 3 +- 3 files changed, 162 insertions(+), 24 deletions(-) diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c index 0203369c3c2..b2c6d1507e5 100644 --- a/ctdb/server/ctdb_lock.c +++ b/ctdb/server/ctdb_lock.c @@ -192,7 +192,7 @@ static int ctdb_lock_context_destructor(struct lock_context *lock_ctx) lock_ctx->request->lctx = NULL; } if (lock_ctx->child > 0) { - ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGKILL); + ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGTERM); if (lock_ctx->type == LOCK_RECORD) { DLIST_REMOVE(lock_ctx->ctdb_db->lock_current, lock_ctx); } else { @@ -683,7 +683,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb) ctdb_lock_timeout_handler, (void *)lock_ctx); if (lock_ctx->ttimer == NULL) { - ctdb_kill(ctdb, lock_ctx->child, SIGKILL); + ctdb_kill(ctdb, lock_ctx->child, SIGTERM); lock_ctx->child = -1; close(lock_ctx->fd[0]); return; @@ -698,7 +698,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb) (void *)lock_ctx); if (lock_ctx->tfd == NULL) { TALLOC_FREE(lock_ctx->ttimer); - ctdb_kill(ctdb, lock_ctx->child, SIGKILL); + ctdb_kill(ctdb, lock_ctx->child, SIGTERM); lock_ctx->child = -1; close(lock_ctx->fd[0]); return; diff --git a/ctdb/server/ctdb_lock_helper.c b/ctdb/server/ctdb_lock_helper.c index 6627412f38e..8b9f5aa3bd8 100644 --- a/ctdb/server/ctdb_lock_helper.c +++ b/ctdb/server/ctdb_lock_helper.c @@ -20,11 +20,14 @@ #include "replace.h" #include "system/filesys.h" #include "system/network.h" +#include "system/wait.h" #include +#include #include #include "lib/util/sys_rw.h" +#include "lib/util/tevent_unix.h" #include "protocol/protocol.h" @@ -33,6 +36,11 @@ static char *progname = NULL; static bool realtime = true; +struct lock_state { + struct tdb_context *tdb; + TDB_DATA key; +}; + static void set_priority(void) { const char *ptr; @@ -93,10 +101,9 @@ static uint8_t *hex_decode_talloc(TALLOC_CTX *mem_ctx, return buffer; } -static int lock_record(const char *dbpath, const char *dbflags, const char *dbkey) +static int lock_record(const char *dbpath, const char *dbflags, + const char *dbkey, struct lock_state *state) { - TDB_DATA key; - struct tdb_context *tdb; int tdb_flags; /* No error checking since CTDB always passes sane values */ @@ -104,23 +111,24 @@ static int lock_record(const char *dbpath, const char *dbflags, const char *dbke /* Convert hex key to key */ if (strcmp(dbkey, "NULL") == 0) { - key.dptr = NULL; - key.dsize = 0; + state->key.dptr = NULL; + state->key.dsize = 0; } else { - key.dptr = hex_decode_talloc(NULL, dbkey, &key.dsize); + state->key.dptr = hex_decode_talloc(NULL, dbkey, + &state->key.dsize); } - tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600); - if (tdb == NULL) { + state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600); + if (state->tdb == NULL) { fprintf(stderr, "locking: Error opening database %s\n", dbpath); return 1; } set_priority(); - if (tdb_chainlock(tdb, key) < 0) { + if (tdb_chainlock(state->tdb, state->key) < 0) { fprintf(stderr, "locking: Error getting record lock (%s)\n", - tdb_errorstr(tdb)); + tdb_errorstr(state->tdb)); return 1; } @@ -130,26 +138,25 @@ static int lock_record(const char *dbpath, const char *dbflags, const char *dbke } - -static int lock_db(const char *dbpath, const char *dbflags) +static int lock_db(const char *dbpath, const char *dbflags, + struct lock_state *state) { - struct tdb_context *tdb; int tdb_flags; /* No error checking since CTDB always passes sane values */ tdb_flags = strtol(dbflags, NULL, 0); - tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600); - if (tdb == NULL) { + state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600); + if (state->tdb == NULL) { fprintf(stderr, "locking: Error opening database %s\n", dbpath); return 1; } set_priority(); - if (tdb_lockall(tdb) < 0) { + if (tdb_lockall(state->tdb) < 0) { fprintf(stderr, "locking: Error getting db lock (%s)\n", - tdb_errorstr(tdb)); + tdb_errorstr(state->tdb)); return 1; } @@ -158,13 +165,114 @@ static int lock_db(const char *dbpath, const char *dbflags) return 0; } +struct wait_for_parent_state { + struct tevent_context *ev; + pid_t ppid; +}; + +static void wait_for_parent_check(struct tevent_req *subreq); + +static struct tevent_req *wait_for_parent_send(TALLOC_CTX *mem_ctx, + struct tevent_context *ev, + pid_t ppid) +{ + struct tevent_req *req, *subreq; + struct wait_for_parent_state *state; + + req = tevent_req_create(mem_ctx, &state, struct wait_for_parent_state); + if (req == NULL) { + return NULL; + } + + state->ev = ev; + state->ppid = ppid; + + if (ppid == 1) { + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + subreq = tevent_wakeup_send(state, ev, + tevent_timeval_current_ofs(5,0)); + if (tevent_req_nomem(subreq, req)) { + return tevent_req_post(req, ev); + } + tevent_req_set_callback(subreq, wait_for_parent_check, req); + + return req; +} + +static void wait_for_parent_check(struct tevent_req *subreq) +{ + struct tevent_req *req = tevent_req_callback_data( + subreq, struct tevent_req); + struct wait_for_parent_state *state = tevent_req_data( + req, struct wait_for_parent_state); + bool status; + + status = tevent_wakeup_recv(subreq); + TALLOC_FREE(subreq); + if (! status) { + /* Ignore error */ + fprintf(stderr, "locking: tevent_wakeup_recv() failed\n"); + } + + if (kill(state->ppid, 0) == -1 && errno == ESRCH) { + tevent_req_done(req); + return; + } + + subreq = tevent_wakeup_send(state, state->ev, + tevent_timeval_current_ofs(5,0)); + if (tevent_req_nomem(subreq, req)) { + return; + } + tevent_req_set_callback(subreq, wait_for_parent_check, req); +} + +static bool wait_for_parent_recv(struct tevent_req *req) +{ + if (tevent_req_is_unix_error(req, NULL)) { + return false; + } + + return true; +} + +static void cleanup(struct lock_state *state) +{ + if (state->tdb != NULL) { + if (state->key.dsize == 0) { + tdb_unlockall(state->tdb); + } else { + tdb_chainunlock(state->tdb, state->key); + } + tdb_close(state->tdb); + } +} + +static void signal_handler(struct tevent_context *ev, + struct tevent_signal *se, + int signum, int count, void *siginfo, + void *private_data) +{ + struct lock_state *state = (struct lock_state *)private_data; + + cleanup(state); + exit(0); +} int main(int argc, char *argv[]) { + struct tevent_context *ev; + struct tevent_signal *se; + struct tevent_req *req; + struct lock_state state = { 0 }; int write_fd; char result = 0; int ppid; const char *lock_type; + bool status; reset_scheduler(); @@ -179,6 +287,20 @@ int main(int argc, char *argv[]) write_fd = atoi(argv[2]); lock_type = argv[3]; + ev = tevent_context_init(NULL); + if (ev == NULL) { + fprintf(stderr, "locking: tevent_context_init() failed\n"); + exit(1); + } + + se = tevent_add_signal(ev, ev, SIGTERM, 0, + signal_handler, &state); + if (se == NULL) { + fprintf(stderr, "locking: tevent_add_signal() failed\n"); + talloc_free(ev); + exit(1); + } + if (strcmp(lock_type, "RECORD") == 0) { if (argc != 7) { fprintf(stderr, @@ -187,7 +309,7 @@ int main(int argc, char *argv[]) usage(); exit(1); } - result = lock_record(argv[4], argv[5], argv[6]); + result = lock_record(argv[4], argv[5], argv[6], &state); } else if (strcmp(lock_type, "DB") == 0) { if (argc != 6) { @@ -197,7 +319,7 @@ int main(int argc, char *argv[]) usage(); exit(1); } - result = lock_db(argv[4], argv[5]); + result = lock_db(argv[4], argv[5], &state); } else { fprintf(stderr, "locking: Invalid lock-type '%s'\n", lock_type); @@ -207,6 +329,21 @@ int main(int argc, char *argv[]) send_result(write_fd, result); - ctdb_wait_for_process_to_exit(ppid); + req = wait_for_parent_send(ev, ev, ppid); + if (req == NULL) { + fprintf(stderr, "locking: wait_for_parent_send() failed\n"); + cleanup(&state); + exit(1); + } + + tevent_req_poll(req, ev); + + status = wait_for_parent_recv(req); + if (! status) { + fprintf(stderr, "locking: wait_for_parent_recv() failed\n"); + } + + talloc_free(ev); + cleanup(&state); return 0; } diff --git a/ctdb/wscript b/ctdb/wscript index 4bd7d6692c9..6f633e27609 100644 --- a/ctdb/wscript +++ b/ctdb/wscript @@ -502,7 +502,8 @@ def build(bld): bld.SAMBA_BINARY('ctdb_lock_helper', source='server/ctdb_lock_helper.c', - deps='samba-util sys_rw ctdb-system talloc tdb', + deps='''samba-util sys_rw ctdb-system tevent-util + talloc tevent tdb''', includes='include', install_path='${CTDB_HELPER_BINDIR}') -- 2.34.1