Add rolling statistics that are collected across 10 second intervals.
authorRonnie Sahlberg <ronniesahlberg@gmail.com>
Wed, 29 Sep 2010 02:13:05 +0000 (12:13 +1000)
committerRonnie Sahlberg <ronniesahlberg@gmail.com>
Wed, 29 Sep 2010 02:14:45 +0000 (12:14 +1000)
Add a new command "ctdb stats [num]" that prints the [num] most recent statistics intervals collected.

Makefile.in
client/ctdb_client.c
include/ctdb_client.h
include/ctdb_private.h
include/ctdb_protocol.h
server/ctdb_control.c
server/ctdb_daemon.c
server/ctdb_statistics.c [new file with mode: 0644]
tools/ctdb.c

index 369cec0b6e2399778b8b2ffb02f777789408ecf4..849d6263fc9df9808bacf8f70583f79e099c2bc8 100755 (executable)
@@ -63,7 +63,7 @@ CTDB_SERVER_OBJ = server/ctdbd.o server/ctdb_daemon.o server/ctdb_lockwait.o \
        server/ctdb_traverse.o server/eventscript.o server/ctdb_takeover.o \
        server/ctdb_serverids.o server/ctdb_persistent.o \
        server/ctdb_keepalive.o server/ctdb_logging.o server/ctdb_uptime.o \
-       server/ctdb_vacuum.o server/ctdb_banning.o \
+       server/ctdb_vacuum.o server/ctdb_banning.o server/ctdb_statistics.o \
        $(CTDB_CLIENT_OBJ) $(CTDB_TCP_OBJ) @INFINIBAND_WRAPPER_OBJ@
 
 TEST_BINS=tests/bin/ctdb_bench tests/bin/ctdb_fetch tests/bin/ctdb_fetch_one \
index 6215dc06ddea091bac673e0f0708fb9557dc6a22..4c770fda0b5f1fc3d73f1dcc77db93260dd64a27 100644 (file)
@@ -4216,3 +4216,23 @@ int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout,
 
        return 0;
 }
+
+int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats)
+{
+       int ret;
+       TDB_DATA outdata;
+       int32_t res;
+
+       ret = ctdb_control(ctdb, destnode, 0, 
+                          CTDB_CONTROL_GET_STAT_HISTORY, 0, tdb_null, 
+                          mem_ctx, &outdata, &res, &timeout, NULL);
+       if (ret != 0 || res != 0 || outdata.dsize == 0) {
+               DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getstathistory failed ret:%d res:%d\n", ret, res));
+               return -1;
+       }
+
+       *stats = (struct ctdb_statistics_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
+       talloc_free(outdata.dptr);
+                   
+       return 0;
+}
index 2b709a0818ecacd15ea53ef6d56e67d9c59988c5..b2f65ee24878c9998a4593261beef53a900c68d3 100644 (file)
@@ -588,4 +588,7 @@ struct ctdb_db_priority {
 int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio);
 int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority);
 
+int ctdb_ctrl_getstathistory(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_statistics_wire **stats);
+
+
 #endif /* _CTDB_CLIENT_H */
index 9f62132bbff6251603311ccaecbfcde5866e4a23..2394b4486aa5c03a621484d4105886004adca167 100644 (file)
@@ -334,60 +334,6 @@ struct ctdb_daemon_data {
 
 
 
-/*
-  ctdb statistics information
- */
-struct ctdb_statistics {
-       uint32_t num_clients;
-       uint32_t frozen;
-       uint32_t recovering;
-       uint32_t client_packets_sent;
-       uint32_t client_packets_recv;
-       uint32_t node_packets_sent;
-       uint32_t node_packets_recv;
-       uint32_t keepalive_packets_sent;
-       uint32_t keepalive_packets_recv;
-       struct {
-               uint32_t req_call;
-               uint32_t reply_call;
-               uint32_t req_dmaster;
-               uint32_t reply_dmaster;
-               uint32_t reply_error;
-               uint32_t req_message;
-               uint32_t req_control;
-               uint32_t reply_control;
-       } node;
-       struct {
-               uint32_t req_call;
-               uint32_t req_message;
-               uint32_t req_control;
-       } client;
-       struct {
-               uint32_t call;
-               uint32_t control;
-               uint32_t traverse;
-       } timeouts;
-       struct {
-               double ctdbd;
-               double recd;
-       } reclock;
-       uint32_t total_calls;
-       uint32_t pending_calls;
-       uint32_t lockwait_calls;
-       uint32_t pending_lockwait_calls;
-       uint32_t childwrite_calls;
-       uint32_t pending_childwrite_calls;
-       uint32_t memory_used;
-       uint32_t __last_counter; /* hack for control_statistics_all */
-       uint32_t max_hop_count;
-       double max_call_latency;
-       double max_lockwait_latency;
-       double max_childwrite_latency;
-       uint32_t num_recoveries;
-       struct timeval statistics_start_time;
-       struct timeval statistics_current_time;
-};
-
 
 #define INVALID_GENERATION 1
 /* table that contains the mapping between a hash value and lmaster
@@ -477,6 +423,8 @@ struct ctdb_context {
        struct ctdb_daemon_data daemon;
        struct ctdb_statistics statistics;
        struct ctdb_statistics statistics_current;
+#define MAX_STAT_HISTORY 100
+       struct ctdb_statistics statistics_history[MAX_STAT_HISTORY];
        struct ctdb_vnn_map *vnn_map;
        uint32_t num_clients;
        uint32_t recovery_master;
@@ -1395,6 +1343,10 @@ int update_ip_assignment_tree(struct ctdb_context *ctdb,
 
 int ctdb_init_tevent_logging(struct ctdb_context *ctdb);
 
-int ctdb_update_stat_counter(struct ctdb_context *ctdb, uint32_t *counter, uint32_t value);
+int ctdb_statistics_init(struct ctdb_context *ctdb);
+
+int32_t ctdb_control_get_stat_history(struct ctdb_context *ctdb,
+                                     struct ctdb_req_control *c,
+                                     TDB_DATA *outdata);
 
 #endif
index f04b3f39fbd19394d085219bd1be31f6690d686f..750f1f091bea49d829aaffe766dd34df28236cf4 100644 (file)
@@ -357,6 +357,7 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS          = 0,
                    CTDB_CONTROL_GET_IFACES              = 124,
                    CTDB_CONTROL_SET_IFACE_LINK_STATE    = 125,
                    CTDB_CONTROL_TCP_ADD_DELAYED_UPDATE  = 126,
+                   CTDB_CONTROL_GET_STAT_HISTORY        = 127,
 };
 
 /*
@@ -534,4 +535,68 @@ struct ctdb_all_public_ips {
        struct ctdb_public_ip ips[1];
 };
 
+
+/*
+  ctdb statistics information
+ */
+struct ctdb_statistics {
+       uint32_t num_clients;
+       uint32_t frozen;
+       uint32_t recovering;
+       uint32_t client_packets_sent;
+       uint32_t client_packets_recv;
+       uint32_t node_packets_sent;
+       uint32_t node_packets_recv;
+       uint32_t keepalive_packets_sent;
+       uint32_t keepalive_packets_recv;
+       struct {
+               uint32_t req_call;
+               uint32_t reply_call;
+               uint32_t req_dmaster;
+               uint32_t reply_dmaster;
+               uint32_t reply_error;
+               uint32_t req_message;
+               uint32_t req_control;
+               uint32_t reply_control;
+       } node;
+       struct {
+               uint32_t req_call;
+               uint32_t req_message;
+               uint32_t req_control;
+       } client;
+       struct {
+               uint32_t call;
+               uint32_t control;
+               uint32_t traverse;
+       } timeouts;
+       struct {
+               double ctdbd;
+               double recd;
+       } reclock;
+       uint32_t total_calls;
+       uint32_t pending_calls;
+       uint32_t lockwait_calls;
+       uint32_t pending_lockwait_calls;
+       uint32_t childwrite_calls;
+       uint32_t pending_childwrite_calls;
+       uint32_t memory_used;
+       uint32_t __last_counter; /* hack for control_statistics_all */
+       uint32_t max_hop_count;
+       double max_call_latency;
+       double max_lockwait_latency;
+       double max_childwrite_latency;
+       uint32_t num_recoveries;
+       struct timeval statistics_start_time;
+       struct timeval statistics_current_time;
+};
+
+/*
+ * wire format for statistics history
+ */
+struct ctdb_statistics_wire {
+       uint32_t num;
+       struct ctdb_statistics stats[1];
+};
+
+
 #endif
index 6dd69f35fbb1144e49a988ff37026491cee38483..3356ba1baef65b356ecc3e2612b86cbc5500eafa 100644 (file)
@@ -600,6 +600,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
                CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_control_iface_info));
                return ctdb_control_set_iface_link(ctdb, c, indata);
 
+       case CTDB_CONTROL_GET_STAT_HISTORY:
+               CHECK_CONTROL_DATA_SIZE(0);
+               return ctdb_control_get_stat_history(ctdb, c, outdata);
+
        default:
                DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode));
                return -1;
index 93d6b3acbb4dc281765d441176e8c6c960ab25cb..16ccaacacccc764d3913cd9c4e026f79baef76cb 100644 (file)
@@ -761,6 +761,9 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog)
 
        ctdb_set_child_logging(ctdb);
 
+       /* initialize statistics collection */
+       ctdb_statistics_init(ctdb);
+
        /* force initial recovery for election */
        ctdb->recovery_mode = CTDB_RECOVERY_ACTIVE;
 
diff --git a/server/ctdb_statistics.c b/server/ctdb_statistics.c
new file mode 100644 (file)
index 0000000..243a4f2
--- /dev/null
@@ -0,0 +1,77 @@
+/* 
+   ctdb statistics code
+
+   Copyright (C) Ronnie Sahlberg 2010
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include <string.h>
+#include "lib/tevent/tevent.h"
+#include "../include/ctdb_private.h"
+
+static void ctdb_statistics_update(struct event_context *ev, struct timed_event *te, 
+                                  struct timeval t, void *p)
+{
+       struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
+
+       memmove(&ctdb->statistics_history[1], &ctdb->statistics_history[0], (MAX_STAT_HISTORY-1)*sizeof(struct ctdb_statistics));
+       memcpy(&ctdb->statistics_history[0], &ctdb->statistics_current, sizeof(struct ctdb_statistics));
+       ctdb->statistics_history[0].statistics_current_time = timeval_current();
+
+
+       bzero(&ctdb->statistics_current, sizeof(struct ctdb_statistics));
+       ctdb->statistics_current.statistics_start_time = timeval_current();
+
+       event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(10, 0), ctdb_statistics_update, ctdb);
+}
+
+int ctdb_statistics_init(struct ctdb_context *ctdb)
+{
+       bzero(&ctdb->statistics, sizeof(struct ctdb_statistics));
+
+       bzero(&ctdb->statistics_current, sizeof(struct ctdb_statistics));
+       ctdb->statistics_current.statistics_start_time = timeval_current();
+
+       bzero(ctdb->statistics_history, sizeof(ctdb->statistics_history));
+
+       event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(10, 0), ctdb_statistics_update, ctdb);
+       return 0;
+}
+
+
+int32_t ctdb_control_get_stat_history(struct ctdb_context *ctdb, 
+                                     struct ctdb_req_control *c,
+                                     TDB_DATA *outdata)
+{
+       int len;
+       struct ctdb_statistics_wire *stat;
+
+       len = offsetof(struct ctdb_statistics_wire, stats) + MAX_STAT_HISTORY*sizeof(struct ctdb_statistics);
+
+       stat = talloc_size(outdata, len);
+       if (stat == NULL) {
+               DEBUG(DEBUG_ERR,(__location__ " Failed to allocate statistics history structure\n"));
+               return -1;
+       }
+
+       stat->num = MAX_STAT_HISTORY;
+       memcpy(&stat->stats[0], &ctdb->statistics_history[0], sizeof(ctdb->statistics_history));
+
+       outdata->dsize = len;
+       outdata->dptr  = (uint8_t *)stat;
+
+       return 0;
+}
index 08da51cf69e59dce3249f2f83394b3de4822d7f5..4f8b7598ad424d7a45a9570f9faa14a8fc9286de 100644 (file)
@@ -323,6 +323,38 @@ static int control_statistics_reset(struct ctdb_context *ctdb, int argc, const c
 }
 
 
+/*
+  display remote ctdb rolling statistics
+ */
+static int control_stats(struct ctdb_context *ctdb, int argc, const char **argv)
+{
+       int ret;
+       struct ctdb_statistics_wire *stats;
+       int i, num_records = -1;
+
+       if (argc ==1) {
+               num_records = atoi(argv[0]) - 1;
+       }
+
+       ret = ctdb_ctrl_getstathistory(ctdb, TIMELIMIT(), options.pnn, ctdb, &stats);
+       if (ret != 0) {
+               DEBUG(DEBUG_ERR, ("Unable to get rolling statistics from node %u\n", options.pnn));
+               return ret;
+       }
+       for (i=0;i<stats->num;i++) {
+               if (stats->stats[i].statistics_start_time.tv_sec == 0) {
+                       continue;
+               }
+               show_statistics(&stats->stats[i]);
+               if (i == num_records) {
+                       break;
+               }
+               printf("===\n");
+       }
+       return 0;
+}
+
+
 /*
   display uptime of remote node
  */
@@ -4679,6 +4711,7 @@ static const struct {
        { "listvars",        control_listvars,          true,   false,  "list tunable variables"},
        { "statistics",      control_statistics,        false,  false, "show statistics" },
        { "statisticsreset", control_statistics_reset,  true,   false,  "reset statistics"},
+       { "stats",           control_stats,             false,  false,  "show rolling statistics", "[number of history records]" },
        { "ip",              control_ip,                false,  false,  "show which public ip's that ctdb manages" },
        { "ipinfo",          control_ipinfo,            true,   false,  "show details about a public ip that ctdb manages", "<ip>" },
        { "ifaces",          control_ifaces,            true,   false,  "show which interfaces that ctdb manages" },