LockWait congestion.

author Ronnie Sahlberg <ronniesahlberg@gmail.com>

Mon, 24 Jan 2011 00:42:50 +0000 (11:42 +1100)

committer Ronnie Sahlberg <ronniesahlberg@gmail.com>

Mon, 24 Jan 2011 01:20:14 +0000 (12:20 +1100)
author Ronnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 24 Jan 2011 00:42:50 +0000 (11:42 +1100)
committer Ronnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 24 Jan 2011 01:20:14 +0000 (12:20 +1100)
diff --git a/include/ctdb_private.h b/include/ctdb_private.h

index 6e5c46365a991a4fdcc923b5ff3ce141a95eec1a..4dcf9a5bf9736d9f4b25b7fc24a07a97f7fa8049 100644 (file)
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -507,6 +507,7 @@ struct ctdb_db_context {
         struct ctdb_vacuum_handle *vacuum_handle;
         char *unhealthy_reason;
         int pending_requests;
+       struct lockwait_handle *lockwait_active;
         struct lockwait_handle *lockwait_overflow;
  };
  
diff --git a/server/ctdb_lockwait.c b/server/ctdb_lockwait.c

index bab49fe9ecdd60416e9c4fc69cacff4c36778a02..98606928b44b46f0ae882a8d5a3fe4cfe0029016 100644 (file)
--- a/server/ctdb_lockwait.c
+++ b/server/ctdb_lockwait.c
@@ -70,6 +70,15 @@ static void do_overflow(struct ctdb_db_context *ctdb_db,
         }
  }
  
+static int lockwait_destructor(struct lockwait_handle *h)
+{
+       CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
+       kill(h->child, SIGKILL);
+       h->ctdb_db->pending_requests--;
+       DLIST_REMOVE(h->ctdb_db->lockwait_active, h);
+       return 0;
+}
+
  static void lockwait_handler(struct event_context *ev, struct fd_event *fde, 
                              uint16_t flags, void *private_data)
  {
@@ -77,7 +86,6 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
                                                      struct lockwait_handle);
         void (*callback)(void *) = h->callback;
         void *p = h->private_data;
-       pid_t child = h->child;
         TDB_DATA key = h->key;
         struct tdb_context *tdb = h->ctdb_db->ltdb->tdb;
         TALLOC_CTX *tmp_ctx = talloc_new(ev);
@@ -85,9 +93,7 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
         key.dptr = talloc_memdup(tmp_ctx, key.dptr, key.dsize);
         h->ctdb_db->pending_requests--;
  
-       talloc_set_destructor(h, NULL);
         CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "lockwait", lockwait_latency, h->start_time);
-       CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
  
         /* the handle needs to go away when the context is gone - when
            the handle goes away this implicitly closes the pipe, which
@@ -107,17 +113,9 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
         }
         tdb_chainlock_unmark(tdb, key);
  
-       kill(child, SIGKILL);
         talloc_free(tmp_ctx);
  }
  
-static int lockwait_destructor(struct lockwait_handle *h)
-{
-       CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
-       kill(h->child, SIGKILL);
-       h->ctdb_db->pending_requests--;
-       return 0;
-}
  
  static int overflow_lockwait_destructor(struct lockwait_handle *h)
  {
@@ -141,7 +139,7 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
                                       void (*callback)(void *private_data),
                                       void *private_data)
  {
-       struct lockwait_handle *result;
+       struct lockwait_handle *result, *i;
         int ret;
         pid_t parent = getpid();
  
@@ -159,6 +157,18 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
         result->ctdb_db = ctdb_db;
         result->key = key;
  
+       /* If we already have a lockwait child for this request, then put this
+          request on the overflow queue straight away
+        */
+       for (i = ctdb_db->lockwait_active; i; i = i->next) {
+               if (key.dsize == i->key.dsize
+                   && memcmp(key.dptr, i->key.dptr, key.dsize) == 0) {
+                       DLIST_ADD_END(ctdb_db->lockwait_overflow, result, NULL);
+                       talloc_set_destructor(result, overflow_lockwait_destructor);
+                       return result;
+               }
+       }
+
         /* Don't fire off too many children at once! */
         if (ctdb_db->pending_requests > 200) {
                 DLIST_ADD_END(ctdb_db->lockwait_overflow, result, NULL);
@@ -202,6 +212,9 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
         close(result->fd[1]);
         set_close_on_exec(result->fd[0]);
  
+       /* This is an active lockwait child process */
+       DLIST_ADD_END(ctdb_db->lockwait_active, result, NULL);
+
         DEBUG(DEBUG_DEBUG, (__location__ " Created PIPE FD:%d to child lockwait process\n", result->fd[0]));
  
         ctdb_db->pending_requests++;
diff --git a/server/ctdbd.c b/server/ctdbd.c

index bddd658251f0f17dc355f564ab9a1e122b07c3fd..9eaba1d03808f6bb43eef71aab0f2758eb1d1006 100644 (file)
--- a/server/ctdbd.c
+++ b/server/ctdbd.c
@@ -43,6 +43,7 @@ static struct {
         const char *single_public_ip;
         const char *node_ip;
         int         valgrinding;
+       int         nosetsched;
         int         use_syslog;
         int         start_as_disabled;
         int         start_as_stopped;
@@ -133,6 +134,7 @@ int main(int argc, const char *argv[])
                 { "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL },
                 { "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
                 { "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
+               { "nosetsched", 0, POPT_ARG_NONE, &options.nosetsched, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
                 { "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
                 { "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
                 { "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
@@ -315,7 +317,11 @@ int main(int argc, const char *argv[])
         }
  
         ctdb->valgrinding = options.valgrinding;
-       ctdb->do_setsched = !ctdb->valgrinding;
+       if (options.valgrinding || options.nosetsched) {
+               ctdb->do_setsched = 0;
+       } else {
+               ctdb->do_setsched = 1;
+       }
  
         if (options.max_persistent_check_errors < 0) {
                 ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
author	Ronnie Sahlberg <ronniesahlberg@gmail.com>
	Mon, 24 Jan 2011 00:42:50 +0000 (11:42 +1100)
committer	Ronnie Sahlberg <ronniesahlberg@gmail.com>
	Mon, 24 Jan 2011 01:20:14 +0000 (12:20 +1100)
include/ctdb_private.h		patch \| blob \| history
server/ctdb_lockwait.c		patch \| blob \| history
server/ctdbd.c		patch \| blob \| history