improve timing issue detections
authorChristian Ambach <christian.ambach@de.ibm.com>
Fri, 14 Jan 2011 12:55:28 +0000 (13:55 +0100)
committerRonnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 17 Jan 2011 00:50:41 +0000 (11:50 +1100)
the original "Time jumped" messages are too coarse to interpret
exactly what was going wrong inside of CTDB.

This patch removes the original logs and adds two other logs that
differentiate between the time it took to work on an event and
the time it took to get the next event.

lib/tevent/tevent.c
lib/tevent/tevent_epoll.c
lib/tevent/tevent_select.c
lib/tevent/tevent_standard.c
lib/tevent/tevent_util.c
lib/tevent/tevent_util.h

index 2f6e591baad7b4f2a9466065e126430972f08b46..88ae01c40ae4bca5b3dba7ad280f2a649c27145f 100644 (file)
@@ -590,8 +590,6 @@ extern pid_t ctdbd_pid;
 int tevent_common_loop_wait(struct tevent_context *ev,
                            const char *location)
 {
-       static time_t t=0;
-       time_t new_t;
 
        /*
         * loop as long as we have events pending
@@ -608,19 +606,6 @@ int tevent_common_loop_wait(struct tevent_context *ev,
                                     ret, strerror(errno));
                        return ret;
                }
-               if (getpid() == ctdbd_pid) {
-                       new_t=time(NULL);
-                       if (t != 0) {
-                               if (t > new_t) {
-                                       tevent_debug(ev, TEVENT_DEBUG_FATAL, __location__ " ERROR Time skipped backward by %d seconds\n", (int)(t-new_t));
-                               }
-                               /* We assume here that we get at least one event every 3 seconds */
-                               if (new_t > (t+3)) {
-                                       tevent_debug(ev, TEVENT_DEBUG_FATAL, __location__ " ERROR Time jumped forward by %d seconds\n", (int)(new_t-t));
-                               }
-                       }
-                       t=new_t;
-               }
        }
 
        tevent_debug(ev, TEVENT_DEBUG_WARNING,
index 3abb0e63e59410bfb22e8220017db737c92daf79..e443f9fa6c20fa1e45fb32b22d846b2150c219a7 100644 (file)
@@ -250,7 +250,9 @@ static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval
                return 0;
        }
 
+       tevent_before_wait(epoll_ev->ev);
        ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
+       tevent_after_wait(epoll_ev->ev);
 
        if (ret == -1 && errno == EINTR && epoll_ev->ev->signal_events) {
                if (tevent_common_check_signal(epoll_ev->ev)) {
index 128a7d6b5de464cd172eb6b30e6b6ddcc4a218f7..f25ce05a25b4bd85b6c55106412c9d0ceec5f584 100644 (file)
@@ -157,7 +157,9 @@ static int select_event_loop_select(struct select_event_context *select_ev, stru
                return 0;
        }
 
+       tevent_before_wait(select_ev->ev);
        selrtn = select(select_ev->maxfd+1, &r_fds, &w_fds, NULL, tvalp);
+       tevent_after_wait(select_ev->ev);
 
        if (selrtn == -1 && errno == EINTR &&
            select_ev->ev->signal_events) {
index 3f2d6d5b280f0aef2048996029f0f3fc9f22afa3..abebbd8d38f6eeb48a77ed6f652b567ce94f5520 100644 (file)
@@ -261,7 +261,9 @@ static int epoll_event_loop(struct std_event_context *std_ev, struct timeval *tv
                return 0;
        }
 
+       tevent_before_wait(std_ev->ev);
        ret = epoll_wait(std_ev->epoll_fd, events, MAXEVENTS, timeout);
+       tevent_after_wait(std_ev->ev);
 
        if (ret == -1 && errno == EINTR && std_ev->ev->signal_events) {
                if (tevent_common_check_signal(std_ev->ev)) {
index f78cd8f707ded54e65408eb07d18d38a3fe52142..28c9da346aab0420e2076db02e0fd56372ade40c 100644 (file)
@@ -88,3 +88,54 @@ int ev_set_blocking(int fd, bool set)
        return fcntl( fd, F_SETFL, val);
 #undef FLAG_TO_SET
 }
+
+static struct timeval tevent_before_wait_ts;
+static struct timeval tevent_after_wait_ts;
+
+/*
+ * measure the time difference between multiple arrivals
+ * to the point where we wait for new events to come in
+ *
+ * allows to measure how long it takes to work on a 
+ * event
+ */
+void tevent_before_wait(struct event_context *ev) {
+
+       struct timeval diff;
+       struct timeval now = tevent_timeval_current();
+
+       if (!tevent_timeval_is_zero(&tevent_after_wait_ts)) {
+               diff = tevent_timeval_until(&tevent_after_wait_ts, &now);
+               if (diff.tv_sec > 3) {
+                       tevent_debug(ev, TEVENT_DEBUG_FATAL,  __location__ 
+                                    " Handling event took %d seconds!",
+                                    (int) diff.tv_sec);
+               }
+       }
+
+       tevent_before_wait_ts = tevent_timeval_current();
+
+}
+
+/*
+ * measure how long the select()/epoll() call took
+ *
+ * allows to measure how long we are waiting for new events
+ */
+void tevent_after_wait(struct event_context *ev) {
+
+       struct timeval diff;
+       struct timeval now = tevent_timeval_current();
+
+       if (!tevent_timeval_is_zero(&tevent_before_wait_ts)) {
+               diff = tevent_timeval_until(&tevent_before_wait_ts, &now);
+               if (diff.tv_sec > 3) {
+                       tevent_debug(ev, TEVENT_DEBUG_FATAL,  __location__
+                                    " No event for %d seconds!",
+                                    (int) diff.tv_sec);
+               }
+       }
+
+       tevent_after_wait_ts = tevent_timeval_current();
+
+}
index 46a4506dac4ec74edab1ceea1a3c423742766c77..2852e9f26ae15c3ee59327d57e81aab85d86d421 100644 (file)
@@ -184,6 +184,10 @@ const char **ev_str_list_add(const char **list, const char *s);
 int ev_set_blocking(int fd, bool set);
 size_t ev_str_list_length(const char **list);
 
+void tevent_before_wait(struct event_context *ev);
+void tevent_after_wait(struct event_context *ev);
+
+
 /* Defined here so we can build against older talloc versions that don't
  * have this define yet. */
 #ifndef TALLOC_FREE