2 * CTDB Performance Metrics Domain Agent (PMDA) for Performance Co-Pilot (PCP)
4 * Copyright (c) 1995,2004 Silicon Graphics, Inc. All Rights Reserved.
5 * Copyright (c) 2011 David Disseldorp
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2 of the License, or (at your
10 * option) any later version.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "system/network.h"
29 #include "lib/util/time.h"
30 #include "lib/util/blocking.h"
32 #include "client/client.h"
33 #include "client/client_sync.h"
35 #include <pcp/pmapi.h>
38 #ifdef HAVE___PMID_INT
41 #define pmID_cluster(id) id->cluster
42 #define pmID_item(id) id->item
43 #define pmGetProgname() pmProgname
44 #define pmSetProgname(a) __pmSetProgname(a)
52 * This PMDA connects to the locally running ctdbd daemon and pulls
53 * statistics for export via PCP. The ctdbd Unix domain socket path can be
54 * specified with the CTDB_SOCKET environment variable, otherwise the default
59 * All metrics supported in this PMDA - one table entry for each.
60 * The 4th field specifies the serial number of the instance domain
61 * for the metric, and must be either PM_INDOM_NULL (denoting a
62 * metric that only ever has a single value), or the serial number
63 * of one of the instance domains declared in the instance domain table
64 * (i.e. in indomtab, above).
66 static pmdaMetric metrictab[] = {
68 { NULL, { PMDA_PMID(0,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
69 PMDA_PMUNITS(0,0,0,0,0,0) }, },
71 { NULL, { PMDA_PMID(0,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
72 PMDA_PMUNITS(0,0,0,0,0,0) }, },
74 { NULL, { PMDA_PMID(0,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
75 PMDA_PMUNITS(0,0,0,0,0,0) }, },
76 /* client_packets_sent */
77 { NULL, { PMDA_PMID(0,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
78 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
79 /* client_packets_recv */
80 { NULL, { PMDA_PMID(0,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
81 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
82 /* node_packets_sent */
83 { NULL, { PMDA_PMID(0,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
84 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
85 /* node_packets_recv */
86 { NULL, { PMDA_PMID(0,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
87 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
88 /* keepalive_packets_sent */
89 { NULL, { PMDA_PMID(0,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
90 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
91 /* keepalive_packets_recv */
92 { NULL, { PMDA_PMID(0,8), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
93 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
95 { NULL, { PMDA_PMID(1,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
96 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
98 { NULL, { PMDA_PMID(1,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
99 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
101 { NULL, { PMDA_PMID(1,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
102 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
104 { NULL, { PMDA_PMID(1,3), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
105 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
107 { NULL, { PMDA_PMID(1,4), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
108 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
110 { NULL, { PMDA_PMID(1,5), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
111 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
113 { NULL, { PMDA_PMID(1,6), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
114 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
116 { NULL, { PMDA_PMID(1,7), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
117 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
119 { NULL, { PMDA_PMID(2,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
120 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
122 { NULL, { PMDA_PMID(2,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
123 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
125 { NULL, { PMDA_PMID(2,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
126 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
128 { NULL, { PMDA_PMID(3,0), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
129 PMDA_PMUNITS(0,0,1,0,0,0) }, },
131 { NULL, { PMDA_PMID(3,1), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
132 PMDA_PMUNITS(0,0,1,0,0,0) }, },
134 { NULL, { PMDA_PMID(3,2), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
135 PMDA_PMUNITS(0,0,1,0,0,0) }, },
137 { NULL, { PMDA_PMID(0,9), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
138 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
140 { NULL, { PMDA_PMID(0,10), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
141 PMDA_PMUNITS(0,0,0,0,0,0) }, },
142 /* locks.num_calls */
143 { NULL, { PMDA_PMID(0,11), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
144 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
145 /* locks.num_pending */
146 { NULL, { PMDA_PMID(0,12), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
147 PMDA_PMUNITS(0,0,0,0,0,0) }, },
148 /* childwrite_calls */
149 { NULL, { PMDA_PMID(0,13), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_COUNTER,
150 PMDA_PMUNITS(0,0,1,0,0,PM_COUNT_ONE) }, },
151 /* pending_childwrite_calls */
152 { NULL, { PMDA_PMID(0,14), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
153 PMDA_PMUNITS(0,0,0,0,0,0) }, },
155 { NULL, { PMDA_PMID(0,15), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
156 PMDA_PMUNITS(1,0,0,PM_SPACE_BYTE,0,0) }, },
158 { NULL, { PMDA_PMID(0,16), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
159 PMDA_PMUNITS(0,0,0,0,0,0) }, },
160 /* reclock.ctdbd.max */
161 { NULL, { PMDA_PMID(0,17), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
162 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
163 /* reclock.recd.max */
164 { NULL, { PMDA_PMID(0,18), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
165 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
166 /* call_latency.max */
167 { NULL, { PMDA_PMID(0,19), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
168 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
169 /* locks.latency.max */
170 { NULL, { PMDA_PMID(0,20), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
171 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
172 /* childwrite_latency.max */
173 { NULL, { PMDA_PMID(0,21), PM_TYPE_DOUBLE, PM_INDOM_NULL, PM_SEM_INSTANT,
174 PMDA_PMUNITS(0,1,0,0,PM_TIME_SEC,0) }, },
176 { NULL, { PMDA_PMID(0,22), PM_TYPE_U32, PM_INDOM_NULL, PM_SEM_INSTANT,
177 PMDA_PMUNITS(0,0,0,0,0,0) }, },
180 static struct tevent_context *ev;
181 static struct ctdb_client_context *client;
182 static struct ctdb_statistics *stats;
185 pmda_ctdb_disconnected(void *args)
187 fprintf(stderr, "ctdbd unreachable\n");
193 pmda_ctdb_daemon_connect(void)
195 const char *socket_name;
198 ev = tevent_context_init(NULL);
200 fprintf(stderr, "Failed to init event ctx\n");
204 socket_name = getenv("CTDB_SOCKET");
205 if (socket_name == NULL) {
206 socket_name = CTDB_SOCKET;
209 ret = ctdb_client_init(ev, ev, socket_name, &client);
211 fprintf(stderr, "Failed to connect to ctdb daemon via %s\n",
216 ctdb_client_set_disconnect_callback(client, pmda_ctdb_disconnected,
228 pmda_ctdb_daemon_disconnect(void)
235 fill_base(unsigned int item, pmAtomValue *atom)
239 atom->ul = stats->num_clients;
242 atom->ul = stats->frozen;
245 atom->ul = stats->recovering;
248 atom->ul = stats->client_packets_sent;
251 atom->ul = stats->client_packets_recv;
254 atom->ul = stats->node_packets_sent;
257 atom->ul = stats->node_packets_recv;
260 atom->ul = stats->keepalive_packets_sent;
263 atom->ul = stats->keepalive_packets_recv;
266 atom->ul = stats->total_calls;
269 atom->ul = stats->pending_calls;
272 atom->ul = stats->locks.num_calls;
275 atom->ul = stats->locks.num_pending;
278 atom->ul = stats->childwrite_calls;
281 atom->ul = stats->pending_childwrite_calls;
284 atom->ul = stats->memory_used;
287 atom->ul = stats->max_hop_count;
290 atom->d = stats->reclock.ctdbd.max;
293 atom->d = stats->reclock.recd.max;
296 atom->d = stats->call_latency.max;
299 atom->d = stats->locks.latency.max;
302 atom->d = stats->childwrite_latency.max;
305 atom->ul = stats->num_recoveries;
315 fill_node(unsigned int item, pmAtomValue *atom)
319 atom->ul = stats->node.req_call;
322 atom->ul = stats->node.reply_call;
325 atom->ul = stats->node.req_dmaster;
328 atom->ul = stats->node.reply_dmaster;
331 atom->ul = stats->node.reply_error;
334 atom->ul = stats->node.req_message;
337 atom->ul = stats->node.req_control;
340 atom->ul = stats->node.reply_control;
351 fill_client(unsigned int item, pmAtomValue *atom)
355 atom->ul = stats->client.req_call;
358 atom->ul = stats->client.req_message;
361 atom->ul = stats->client.req_control;
371 fill_timeout(unsigned int item, pmAtomValue *atom)
375 atom->ul = stats->timeouts.call;
378 atom->ul = stats->timeouts.control;
381 atom->ul = stats->timeouts.traverse;
391 * callback provided to pmdaFetch
394 pmda_ctdb_fetch_cb(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
397 #ifdef HAVE___PMID_INT
398 __pmID_int *id = (__pmID_int *)&(mdesc->m_desc.pmid);
400 pmID id = *(pmID *)&(mdesc->m_desc.pmid);
403 if (inst != PM_IN_NULL) {
408 fprintf(stderr, "stats not available\n");
414 switch (pmID_cluster(id)) {
416 ret = fill_base(pmID_item(id), atom);
422 ret = fill_node(pmID_item(id), atom);
428 ret = fill_client(pmID_item(id), atom);
434 ret = fill_timeout(pmID_item(id), atom);
449 * This routine is called once for each pmFetch(3) operation, so is a
450 * good place to do once-per-fetch functions, such as value caching or
451 * instance domain evaluation.
454 pmda_ctdb_fetch(int numpmid, pmID pmidlist[], pmResult **resp, pmdaExt *pmda)
458 if (client == NULL) {
459 fprintf(stderr, "attempting reconnect to ctdbd\n");
460 ret = pmda_ctdb_daemon_connect();
462 fprintf(stderr, "reconnect failed\n");
467 ret = ctdb_ctrl_statistics(client, ev, client, CTDB_CURRENT_NODE,
468 tevent_timeval_current_ofs(1,0), &stats);
470 fprintf(stderr, "ctdb control for statistics failed, reconnecting\n");
471 pmda_ctdb_daemon_disconnect();
476 ret = pmdaFetch(numpmid, pmidlist, resp, pmda);
483 void pmda_ctdb_init(pmdaInterface *dp);
486 * Initialise the agent
489 pmda_ctdb_init(pmdaInterface *dp)
491 if (dp->status != 0) {
495 dp->version.two.fetch = pmda_ctdb_fetch;
496 pmdaSetFetchCallBack(dp, pmda_ctdb_fetch_cb);
498 pmdaInit(dp, NULL, 0, metrictab,
499 (sizeof(metrictab) / sizeof(metrictab[0])));
505 static char buf[MAXPATHLEN];
508 snprintf(buf, sizeof(buf), "%s/ctdb/help",
509 pmGetConfig("PCP_PMDAS_DIR"));
517 fprintf(stderr, "Usage: %s [options]\n\n", pmGetProgname());
519 " -d domain use domain (numeric) for metrics domain of PMDA\n"
520 " -l logfile write log into logfile rather than using default log name\n"
521 "\nExactly one of the following options may appear:\n"
522 " -i port expect PMCD to connect on given inet port (number or name)\n"
523 " -p expect PMCD to supply stdin/stdout (pipe)\n"
524 " -u socket expect PMCD to connect on given unix domain socket\n",
530 * Set up the agent if running as a daemon.
533 main(int argc, char **argv)
536 char log_file[] = "pmda_ctdb.log";
537 pmdaInterface dispatch;
539 pmSetProgname(argv[0]);
541 pmdaDaemon(&dispatch, PMDA_INTERFACE_2, argv[0], CTDB,
542 log_file, helpfile());
544 if (pmdaGetOpt(argc, argv, "d:i:l:pu:?", &dispatch, &err) != EOF) {
552 pmdaOpenLog(&dispatch);
553 pmda_ctdb_init(&dispatch);
554 pmdaConnect(&dispatch);