Update latency countes to show min/max and average
authorRonnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 11 Oct 2010 04:11:18 +0000 (15:11 +1100)
committerRonnie Sahlberg <ronniesahlberg@gmail.com>
Mon, 11 Oct 2010 04:12:24 +0000 (15:12 +1100)
include/ctdb_private.h
include/ctdb_protocol.h
server/ctdb_daemon.c
server/ctdb_lockwait.c
server/ctdb_persistent.c
tools/ctdb.c

index f018554dd71e74c932bd408c94e326f48c2bdcaa..a99b4b03381589c2c97e14262596f353a1e8b248 100644 (file)
@@ -304,10 +304,21 @@ struct ctdb_daemon_data {
 
 #define CTDB_UPDATE_RECLOCK_LATENCY(ctdb, name, counter, value) \
        {                                                                               \
-               if (value > ctdb->statistics.counter)                                   \
-                       ctdb->statistics.counter = value;                               \
-               if (value > ctdb->statistics_current.counter)                           \
-                       ctdb->statistics_current.counter = value;                       \
+               if (value > ctdb->statistics.counter.max)                                       \
+                       ctdb->statistics.counter.max = value;                           \
+               if (value > ctdb->statistics_current.counter.max)                               \
+                       ctdb->statistics_current.counter.max = value;                   \
+                                                                                       \
+               if (ctdb->statistics.counter.num == 0 || value < ctdb->statistics.counter.min)  \
+                       ctdb->statistics.counter.min = value;                           \
+               if (ctdb->statistics_current.counter.num == 0 || value < ctdb->statistics_current.counter.min)  \
+                       ctdb->statistics_current.counter.min = value;                   \
+                                                                                       \
+               ctdb->statistics.counter.total += value;                                        \
+               ctdb->statistics_current.counter.total += value;                                \
+                                                                                       \
+               ctdb->statistics.counter.num++;                                         \
+               ctdb->statistics_current.counter.num++;                                 \
                                                                                        \
                if (ctdb->tunable.reclock_latency_ms != 0) {                            \
                        if (value*1000 > ctdb->tunable.reclock_latency_ms) {            \
@@ -320,10 +331,22 @@ struct ctdb_daemon_data {
 #define CTDB_UPDATE_LATENCY(ctdb, db, operation, counter, t) \
        {                                                                               \
                double l = timeval_elapsed(&t);                                         \
-               if (l > ctdb->statistics.counter)                                       \
-                       ctdb->statistics.counter = l;                                   \
-               if (l > ctdb->statistics_current.counter)                               \
-                       ctdb->statistics_current.counter = l;                           \
+                                                                                       \
+               if (l > ctdb->statistics.counter.max)                                   \
+                       ctdb->statistics.counter.max = l;                               \
+               if (l > ctdb->statistics_current.counter.max)                           \
+                       ctdb->statistics_current.counter.max = l;                       \
+                                                                                       \
+               if (ctdb->statistics.counter.num == 0 || l < ctdb->statistics.counter.min)      \
+                       ctdb->statistics.counter.min = l;                               \
+               if (ctdb->statistics_current.counter.num == 0 || l < ctdb->statistics_current.counter.min)      \
+                       ctdb->statistics_current.counter.min = l;                       \
+                                                                                       \
+               ctdb->statistics.counter.total += l;                                    \
+               ctdb->statistics_current.counter.total += l;                            \
+                                                                                       \
+               ctdb->statistics.counter.num++;                                         \
+               ctdb->statistics_current.counter.num++;                                 \
                                                                                        \
                if (ctdb->tunable.log_latency_ms !=0) {                                 \
                        if (l*1000 > ctdb->tunable.log_latency_ms) {                    \
index 750f1f091bea49d829aaffe766dd34df28236cf4..1568460a03235f0c7dd8e04f95e7b88d513c9cc3 100644 (file)
@@ -536,6 +536,13 @@ struct ctdb_all_public_ips {
 };
 
 
+struct latency_counter {
+       int num;
+       double min;
+       double max;
+       double total;
+};
+
 /*
   ctdb statistics information
  */
@@ -570,8 +577,8 @@ struct ctdb_statistics {
                uint32_t traverse;
        } timeouts;
        struct {
-               double ctdbd;
-               double recd;
+               struct latency_counter ctdbd;
+               struct latency_counter recd;
        } reclock;
        uint32_t total_calls;
        uint32_t pending_calls;
@@ -582,9 +589,9 @@ struct ctdb_statistics {
        uint32_t memory_used;
        uint32_t __last_counter; /* hack for control_statistics_all */
        uint32_t max_hop_count;
-       double max_call_latency;
-       double max_lockwait_latency;
-       double max_childwrite_latency;
+       struct latency_counter call_latency;
+       struct latency_counter lockwait_latency;
+       struct latency_counter childwrite_latency;
        uint32_t num_recoveries;
        struct timeval statistics_start_time;
        struct timeval statistics_current_time;
index 16ccaacacccc764d3913cd9c4e026f79baef76cb..12ad9543c46005f65ae272f47d0e79c7371ac024 100644 (file)
@@ -258,7 +258,7 @@ static void daemon_call_from_client_callback(struct ctdb_call_state *state)
                DEBUG(DEBUG_ERR, (__location__ " ctdbd_call_recv() returned error\n"));
                CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
 
-               CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 1", max_call_latency, dstate->start_time);
+               CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 1", call_latency, dstate->start_time);
                return;
        }
 
@@ -268,7 +268,7 @@ static void daemon_call_from_client_callback(struct ctdb_call_state *state)
        if (r == NULL) {
                DEBUG(DEBUG_ERR, (__location__ " Failed to allocate reply_call in ctdb daemon\n"));
                CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
-               CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 2", max_call_latency, dstate->start_time);
+               CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 2", call_latency, dstate->start_time);
                return;
        }
        r->hdr.reqid        = dstate->reqid;
@@ -283,7 +283,7 @@ static void daemon_call_from_client_callback(struct ctdb_call_state *state)
        if (res != 0) {
                DEBUG(DEBUG_ERR, (__location__ " Failed to queue packet from daemon to client\n"));
        }
-       CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 3", max_call_latency, dstate->start_time);
+       CTDB_UPDATE_LATENCY(client->ctdb, ctdb_db, "call_from_client_cb 3", call_latency, dstate->start_time);
        CTDB_DECREMENT_STAT(client->ctdb, pending_calls);
        talloc_free(dstate);
 }
@@ -409,7 +409,7 @@ static void daemon_request_call_from_client(struct ctdb_client *client,
 
                DEBUG(DEBUG_ERR,(__location__ " Unable to allocate call\n"));
                CTDB_DECREMENT_STAT(ctdb, pending_calls);
-               CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 1", max_call_latency, dstate->start_time);
+               CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 1", call_latency, dstate->start_time);
                return;
        }
 
@@ -433,7 +433,7 @@ static void daemon_request_call_from_client(struct ctdb_client *client,
        if (state == NULL) {
                DEBUG(DEBUG_ERR,(__location__ " Unable to setup call send\n"));
                CTDB_DECREMENT_STAT(ctdb, pending_calls);
-               CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 2", max_call_latency, dstate->start_time);
+               CTDB_UPDATE_LATENCY(ctdb, ctdb_db, "call_from_client 2", call_latency, dstate->start_time);
                return;
        }
        talloc_steal(state, dstate);
index 9bbc25f9378b1d105bfadcdf0a4c18a934b58711..48198fd1f67f313a0ae58c9d435826b15157d13f 100644 (file)
@@ -53,7 +53,7 @@ static void lockwait_handler(struct event_context *ev, struct fd_event *fde,
        key.dptr = talloc_memdup(tmp_ctx, key.dptr, key.dsize);
 
        talloc_set_destructor(h, NULL);
-       CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "lockwait", max_lockwait_latency, h->start_time);
+       CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "lockwait", lockwait_latency, h->start_time);
        CTDB_DECREMENT_STAT(h->ctdb, pending_lockwait_calls);
 
        /* the handle needs to go away when the context is gone - when
index 8672c5b3094b27d533166a4339b11875a92b3355..9346f7d0da4517a6f0f566eb2b046db597b36b5f 100644 (file)
@@ -475,7 +475,7 @@ static void childwrite_handler(struct event_context *ev, struct fd_event *fde,
        int ret;
        char c;
 
-       CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "persistent", max_childwrite_latency, h->start_time);
+       CTDB_UPDATE_LATENCY(h->ctdb, h->ctdb_db, "persistent", childwrite_latency, h->start_time);
        CTDB_DECREMENT_STAT(h->ctdb, pending_childwrite_calls);
 
        /* the handle needs to go away when the context is gone - when
index 8d422552aca693a2b4242b1dd70238b6cd192ef9..0409a40be67f1858af76118df4e18efe0dcb1e0f 100644 (file)
@@ -219,11 +219,29 @@ static void show_statistics(struct ctdb_statistics *s, int show_header)
                        for (i=0;i<ARRAY_SIZE(fields);i++) {
                                printf("%s:", fields[i].name);
                        }
-                       printf("max_reclock_ctdbd:");
-                       printf("max_reclock_recd:");
+                       printf("num_reclock_ctdbd_latency:");
+                       printf("min_reclock_ctdbd_latency:");
+                       printf("avg_reclock_ctdbd_latency:");
+                       printf("max_reclock_ctdbd_latency:");
+
+                       printf("num_reclock_recd_latency:");
+                       printf("min_reclock_recd_latency:");
+                       printf("avg_reclock_recd_latency:");
+                       printf("max_reclock_recd_latency:");
+
+                       printf("num_call_latency:");
+                       printf("min_call_latency:");
+                       printf("avg_call_latency:");
                        printf("max_call_latency:");
+
+                       printf("num_lockwait_latency:");
+                       printf("min_lockwait_latency:");
+                       printf("avg_lockwait_latency:");
                        printf("max_lockwait_latency:");
-                       printf("max_childwrite_latency:");
+
+                       printf("num_childwrite_latency:");
+                       printf("min_childwrite_latency:");
+                       printf("avg_childwrite_latency:");
                        printf("max_childwrite_latency:");
                        printf("\n");
                }
@@ -233,12 +251,30 @@ static void show_statistics(struct ctdb_statistics *s, int show_header)
                for (i=0;i<ARRAY_SIZE(fields);i++) {
                        printf("%d:", *(uint32_t *)(fields[i].offset+(uint8_t *)s));
                }
-               printf("%.6f:", s->reclock.ctdbd);
-               printf("%.6f:", s->reclock.recd);
-               printf("%.6f:", s->max_call_latency);
-               printf("%.6f:", s->max_lockwait_latency);
-               printf("%.6f:", s->max_childwrite_latency);
-               printf("%.6f:", s->max_childwrite_latency);
+               printf("%d:", s->reclock.ctdbd.num);
+               printf("%.6f:", s->reclock.ctdbd.min);
+               printf("%.6f:", s->reclock.ctdbd.num?s->reclock.ctdbd.total/s->reclock.ctdbd.num:0.0);
+               printf("%.6f:", s->reclock.ctdbd.max);
+
+               printf("%d:", s->reclock.recd.num);
+               printf("%.6f:", s->reclock.recd.min);
+               printf("%.6f:", s->reclock.recd.num?s->reclock.recd.total/s->reclock.recd.num:0.0);
+               printf("%.6f:", s->reclock.recd.max);
+
+               printf("%d:", s->call_latency.num);
+               printf("%.6f:", s->call_latency.min);
+               printf("%.6f:", s->call_latency.num?s->call_latency.total/s->call_latency.num:0.0);
+               printf("%.6f:", s->call_latency.max);
+
+               printf("%d:", s->lockwait_latency.num);
+               printf("%.6f:", s->lockwait_latency.min);
+               printf("%.6f:", s->lockwait_latency.num?s->lockwait_latency.total/s->lockwait_latency.num:0.0);
+               printf("%.6f:", s->lockwait_latency.max);
+
+               printf("%d:", s->childwrite_latency.num);
+               printf("%.6f:", s->childwrite_latency.min);
+               printf("%.6f:", s->childwrite_latency.num?s->childwrite_latency.total/s->childwrite_latency.num:0.0);
+               printf("%.6f:", s->childwrite_latency.max);
                printf("\n");
        } else {
                printf("CTDB version %u\n", CTDB_VERSION);
@@ -261,13 +297,13 @@ static void show_statistics(struct ctdb_statistics *s, int show_header)
                               preflen?0:4, "",
                               *(uint32_t *)(fields[i].offset+(uint8_t *)s));
                }
-               printf(" %-30s     %.6f sec\n", "max_reclock_ctdbd", s->reclock.ctdbd);
-               printf(" %-30s     %.6f sec\n", "max_reclock_recd", s->reclock.recd);
+               printf(" %-30s     %.6f/%.6f/%.6f sec out of %d\n", "reclock_ctdbd       MIN/AVG/MAX", s->reclock.ctdbd.min, s->reclock.ctdbd.num?s->reclock.ctdbd.total/s->reclock.ctdbd.num:0.0, s->reclock.ctdbd.max, s->reclock.ctdbd.num);
+
+               printf(" %-30s     %.6f/%.6f/%.6f sec out of %d\n", "reclock_recd       MIN/AVG/MAX", s->reclock.recd.min, s->reclock.recd.num?s->reclock.recd.total/s->reclock.recd.num:0.0, s->reclock.recd.max, s->reclock.recd.num);
 
-               printf(" %-30s     %.6f sec\n", "max_call_latency", s->max_call_latency);
-               printf(" %-30s     %.6f sec\n", "max_lockwait_latency", s->max_lockwait_latency);
-               printf(" %-30s     %.6f sec\n", "max_childwrite_latency", s->max_childwrite_latency);
-               printf(" %-30s     %.6f sec\n", "max_childwrite_latency", s->max_childwrite_latency);
+               printf(" %-30s     %.6f/%.6f/%.6f sec out of %d\n", "call_latency       MIN/AVG/MAX", s->call_latency.min, s->call_latency.num?s->call_latency.total/s->call_latency.num:0.0, s->call_latency.max, s->call_latency.num);
+               printf(" %-30s     %.6f/%.6f/%.6f sec out of %d\n", "lockwait_latency   MIN/AVG/MAX", s->lockwait_latency.min, s->lockwait_latency.num?s->lockwait_latency.total/s->lockwait_latency.num:0.0, s->lockwait_latency.max, s->lockwait_latency.num);
+               printf(" %-30s     %.6f/%.6f/%.6f sec out of %d\n", "childwrite_latency MIN/AVG/MAX", s->childwrite_latency.min, s->childwrite_latency.num?s->childwrite_latency.total/s->childwrite_latency.num:0.0, s->childwrite_latency.max, s->childwrite_latency.num);
        }
 
        talloc_free(tmp_ctx);
@@ -305,10 +341,10 @@ static int control_statistics_all(struct ctdb_context *ctdb)
                }
                statistics.max_hop_count = 
                        MAX(statistics.max_hop_count, s1.max_hop_count);
-               statistics.max_call_latency = 
-                       MAX(statistics.max_call_latency, s1.max_call_latency);
-               statistics.max_lockwait_latency = 
-                       MAX(statistics.max_lockwait_latency, s1.max_lockwait_latency);
+               statistics.call_latency.max = 
+                       MAX(statistics.call_latency.max, s1.call_latency.max);
+               statistics.lockwait_latency.max = 
+                       MAX(statistics.lockwait_latency.max, s1.lockwait_latency.max);
        }
        talloc_free(nodes);
        printf("Gathered statistics for %u nodes\n", num_nodes);