From: Ronnie Sahlberg Date: Mon, 24 Jan 2011 00:42:50 +0000 (+1100) Subject: LockWait congestion. X-Git-Url: http://git.samba.org/?p=sahlberg%2Fctdb.git;a=commitdiff_plain;h=8a143a97a313a2e50fb409f6382c759dbd14cdcd LockWait congestion. Add a dlist to track all active lockwait child processes. Everytime creating a new lockwait handle, check if there is already an active lockwait process for this database/key and if so, send the new request straight to the overflow queue. This means we will only have one active lockwaic child process for a certain key, even if there were thousands of fetch-lock requests for this key. When the lockwait processing finishes for the original request, the processing in d_overflow() will automagically process all remaining keys as well. Add back a --nosetsched argument to make it easier to run under gdb --- diff --git a/include/ctdb_private.h b/include/ctdb_private.h index 6e5c4636..4dcf9a5b 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -507,6 +507,7 @@ struct ctdb_db_context { struct ctdb_vacuum_handle *vacuum_handle; char *unhealthy_reason; int pending_requests; + struct lockwait_handle *lockwait_active; struct lockwait_handle *lockwait_overflow; }; diff --git a/server/ctdb_lockwait.c b/server/ctdb_lockwait.c index bab49fe9..98606928 100644 --- a/server/ctdb_lockwait.c +++ b/server/ctdb_lockwait.c @@ -70,6 +70,15 @@ static void do_overflow(struct ctdb_db_context *ctdb_db, } } +static int lockwait_destructor(struct lockwait_handle *h) +{ + CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls); + kill(h->child, SIGKILL); + h->ctdb_db->pending_requests--; + DLIST_REMOVE(h->ctdb_db->lockwait_active, h); + return 0; +} + static void lockwait_handler(struct event_context *ev, struct fd_event *fde, uint16_t flags, void *private_data) { @@ -77,7 +86,6 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde, struct lockwait_handle); void (*callback)(void *) = h->callback; void *p = h->private_data; - pid_t child = h->child; TDB_DATA key = h->key; struct tdb_context *tdb = h->ctdb_db->ltdb->tdb; TALLOC_CTX *tmp_ctx = talloc_new(ev); @@ -85,9 +93,7 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde, key.dptr = talloc_memdup(tmp_ctx, key.dptr, key.dsize); h->ctdb_db->pending_requests--; - talloc_set_destructor(h, NULL); CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "lockwait", lockwait_latency, h->start_time); - CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls); /* the handle needs to go away when the context is gone - when the handle goes away this implicitly closes the pipe, which @@ -107,17 +113,9 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde, } tdb_chainlock_unmark(tdb, key); - kill(child, SIGKILL); talloc_free(tmp_ctx); } -static int lockwait_destructor(struct lockwait_handle *h) -{ - CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls); - kill(h->child, SIGKILL); - h->ctdb_db->pending_requests--; - return 0; -} static int overflow_lockwait_destructor(struct lockwait_handle *h) { @@ -141,7 +139,7 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db, void (*callback)(void *private_data), void *private_data) { - struct lockwait_handle *result; + struct lockwait_handle *result, *i; int ret; pid_t parent = getpid(); @@ -159,6 +157,18 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db, result->ctdb_db = ctdb_db; result->key = key; + /* If we already have a lockwait child for this request, then put this + request on the overflow queue straight away + */ + for (i = ctdb_db->lockwait_active; i; i = i->next) { + if (key.dsize == i->key.dsize + && memcmp(key.dptr, i->key.dptr, key.dsize) == 0) { + DLIST_ADD_END(ctdb_db->lockwait_overflow, result, NULL); + talloc_set_destructor(result, overflow_lockwait_destructor); + return result; + } + } + /* Don't fire off too many children at once! */ if (ctdb_db->pending_requests > 200) { DLIST_ADD_END(ctdb_db->lockwait_overflow, result, NULL); @@ -202,6 +212,9 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db, close(result->fd[1]); set_close_on_exec(result->fd[0]); + /* This is an active lockwait child process */ + DLIST_ADD_END(ctdb_db->lockwait_active, result, NULL); + DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child lockwait process\n", result->fd[0])); ctdb_db->pending_requests++; diff --git a/server/ctdbd.c b/server/ctdbd.c index bddd6582..9eaba1d0 100644 --- a/server/ctdbd.c +++ b/server/ctdbd.c @@ -43,6 +43,7 @@ static struct { const char *single_public_ip; const char *node_ip; int valgrinding; + int nosetsched; int use_syslog; int start_as_disabled; int start_as_stopped; @@ -133,6 +134,7 @@ int main(int argc, const char *argv[]) { "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL }, { "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" }, { "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL }, + { "nosetsched", 0, POPT_ARG_NONE, &options.nosetsched, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL }, { "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL }, { "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL }, { "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL }, @@ -315,7 +317,11 @@ int main(int argc, const char *argv[]) } ctdb->valgrinding = options.valgrinding; - ctdb->do_setsched = !ctdb->valgrinding; + if (options.valgrinding || options.nosetsched) { + ctdb->do_setsched = 0; + } else { + ctdb->do_setsched = 1; + } if (options.max_persistent_check_errors < 0) { ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;