4 Copyright (C) Andrew Tridgell 2006
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "system/network.h"
24 #include "system/filesys.h"
25 #include "system/wait.h"
26 #include "system/shmem.h"
27 #include "../include/ctdb_private.h"
29 int LogLevel = DEBUG_NOTICE;
30 int this_log_level = 0;
32 ctdb_ringbuf_log_fn *ctdb_ringbuf_log = NULL;
37 return error string for last error
39 const char *ctdb_errstr(struct ctdb_context *ctdb)
46 remember an error message
48 void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...)
51 talloc_free(ctdb->err_msg);
53 ctdb->err_msg = talloc_vasprintf(ctdb, fmt, ap);
54 DEBUG(DEBUG_ERR,("ctdb error: %s\n", ctdb->err_msg));
59 a fatal internal error occurred - no hope for recovery
61 void ctdb_fatal(struct ctdb_context *ctdb, const char *msg)
63 DEBUG(DEBUG_ALERT,("ctdb fatal error: %s\n", msg));
70 int ctdb_parse_address(struct ctdb_context *ctdb,
71 TALLOC_CTX *mem_ctx, const char *str,
72 struct ctdb_address *address)
77 se = getservbyname("ctdb", "tcp");
80 address->address = talloc_strdup(mem_ctx, str);
81 CTDB_NO_MEMORY(ctdb, address->address);
84 address->port = CTDB_PORT;
86 address->port = ntohs(se->s_port);
93 check if two addresses are the same
95 bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2)
97 return strcmp(a1->address, a2->address) == 0 && a1->port == a2->port;
102 hash function for mapping data to a VNN - taken from tdb
104 uint32_t ctdb_hash(const TDB_DATA *key)
106 uint32_t value; /* Used to compute the hash value. */
107 uint32_t i; /* Used to cycle through random values. */
109 /* Set the initial value from the key size. */
110 for (value = 0x238F13AF * key->dsize, i=0; i < key->dsize; i++)
111 value = (value + (key->dptr[i] << (i*5 % 24)));
113 return (1103515243 * value + 12345);
117 a type checking varient of idr_find
119 static void *_idr_find_type(struct idr_context *idp, int id, const char *type, const char *location)
121 void *p = idr_find(idp, id);
122 if (p && talloc_check_name(p, type) == NULL) {
123 DEBUG(DEBUG_ERR,("%s idr_find_type expected type %s but got %s\n",
124 location, type, talloc_get_name(p)));
132 update a max latency number
134 void ctdb_latency(struct ctdb_db_context *ctdb_db, const char *name, double *latency, struct timeval t)
136 double l = timeval_elapsed(&t);
141 if (ctdb_db->ctdb->tunable.log_latency_ms !=0) {
142 if (l*1000 > ctdb_db->ctdb->tunable.log_latency_ms) {
143 DEBUG(DEBUG_WARNING, ("High latency %.6fs for operation %s on database %s\n", l, name, ctdb_db->db_name));
149 update a reclock latency number
151 void ctdb_reclock_latency(struct ctdb_context *ctdb, const char *name, double *latency, double l)
157 if (ctdb->tunable.reclock_latency_ms !=0) {
158 if (l*1000 > ctdb->tunable.reclock_latency_ms) {
159 DEBUG(DEBUG_ERR, ("High RECLOCK latency %fs for operation %s\n", l, name));
164 uint32_t ctdb_reqid_new(struct ctdb_context *ctdb, void *state)
168 id = ctdb->idr_cnt++ & 0xFFFF;
169 id |= (idr_get_new(ctdb->idr, state, 0xFFFF)<<16);
173 void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *type, const char *location)
177 p = _idr_find_type(ctdb->idr, (reqid>>16)&0xFFFF, type, location);
179 DEBUG(DEBUG_WARNING, ("Could not find idr:%u\n",reqid));
186 void ctdb_reqid_remove(struct ctdb_context *ctdb, uint32_t reqid)
190 ret = idr_remove(ctdb->idr, (reqid>>16)&0xFFFF);
192 DEBUG(DEBUG_ERR, ("Removing idr that does not exist\n"));
198 form a ctdb_rec_data record from a key/data pair
200 note that header may be NULL. If not NULL then it is included in the data portion
203 struct ctdb_rec_data *ctdb_marshall_record(TALLOC_CTX *mem_ctx, uint32_t reqid,
205 struct ctdb_ltdb_header *header,
209 struct ctdb_rec_data *d;
211 length = offsetof(struct ctdb_rec_data, data) + key.dsize +
212 data.dsize + (header?sizeof(*header):0);
213 d = (struct ctdb_rec_data *)talloc_size(mem_ctx, length);
219 d->keylen = key.dsize;
220 memcpy(&d->data[0], key.dptr, key.dsize);
222 d->datalen = data.dsize + sizeof(*header);
223 memcpy(&d->data[key.dsize], header, sizeof(*header));
224 memcpy(&d->data[key.dsize+sizeof(*header)], data.dptr, data.dsize);
226 d->datalen = data.dsize;
227 memcpy(&d->data[key.dsize], data.dptr, data.dsize);
233 /* helper function for marshalling multiple records */
234 struct ctdb_marshall_buffer *ctdb_marshall_add(TALLOC_CTX *mem_ctx,
235 struct ctdb_marshall_buffer *m,
239 struct ctdb_ltdb_header *header,
242 struct ctdb_rec_data *r;
243 size_t m_size, r_size;
244 struct ctdb_marshall_buffer *m2;
246 r = ctdb_marshall_record(mem_ctx, reqid, key, header, data);
253 m = talloc_zero_size(mem_ctx, offsetof(struct ctdb_marshall_buffer, data));
260 m_size = talloc_get_size(m);
261 r_size = talloc_get_size(r);
263 m2 = talloc_realloc_size(mem_ctx, m, m_size + r_size);
269 memcpy(m_size + (uint8_t *)m2, r, r_size);
278 /* we've finished marshalling, return a data blob with the marshalled records */
279 TDB_DATA ctdb_marshall_finish(struct ctdb_marshall_buffer *m)
282 data.dptr = (uint8_t *)m;
283 data.dsize = talloc_get_size(m);
288 loop over a marshalling buffer
290 - pass r==NULL to start
291 - loop the number of times indicated by m->count
293 struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, struct ctdb_rec_data *r,
295 struct ctdb_ltdb_header *header,
296 TDB_DATA *key, TDB_DATA *data)
299 r = (struct ctdb_rec_data *)&m->data[0];
301 r = (struct ctdb_rec_data *)(r->length + (uint8_t *)r);
309 key->dptr = &r->data[0];
310 key->dsize = r->keylen;
313 data->dptr = &r->data[r->keylen];
314 data->dsize = r->datalen;
315 if (header != NULL) {
316 data->dptr += sizeof(*header);
317 data->dsize -= sizeof(*header);
321 if (header != NULL) {
322 if (r->datalen < sizeof(*header)) {
325 *header = *(struct ctdb_ltdb_header *)&r->data[r->keylen];
332 if possible, make this task very high priority
334 void ctdb_high_priority(struct ctdb_context *ctdb)
337 if (nice(-20) == -1 && errno != 0) {
338 DEBUG(DEBUG_WARNING,("Unable to renice self: %s\n",
341 DEBUG(DEBUG_NOTICE,("Scheduler says I'm nice: %i\n",
342 getpriority(PRIO_PROCESS, getpid())));
347 make ourselves slightly nicer: eg. a ctdb child.
349 void ctdb_reduce_priority(struct ctdb_context *ctdb)
352 if (nice(10) == -1 && errno != 0) {
353 DEBUG(DEBUG_WARNING,("Unable to lower priority: %s\n",
358 void set_nonblocking(int fd)
361 v = fcntl(fd, F_GETFL, 0);
362 fcntl(fd, F_SETFL, v | O_NONBLOCK);
365 void set_close_on_exec(int fd)
368 v = fcntl(fd, F_GETFD, 0);
369 fcntl(fd, F_SETFD, v | FD_CLOEXEC);
373 bool parse_ipv4(const char *s, unsigned port, struct sockaddr_in *sin)
375 sin->sin_family = AF_INET;
376 sin->sin_port = htons(port);
378 if (inet_pton(AF_INET, s, &sin->sin_addr) != 1) {
379 DEBUG(DEBUG_ERR, (__location__ " Failed to translate %s into sin_addr\n", s));
386 static bool parse_ipv6(const char *s, const char *ifaces, unsigned port, ctdb_sock_addr *saddr)
388 saddr->ip6.sin6_family = AF_INET6;
389 saddr->ip6.sin6_port = htons(port);
390 saddr->ip6.sin6_flowinfo = 0;
391 saddr->ip6.sin6_scope_id = 0;
393 if (inet_pton(AF_INET6, s, &saddr->ip6.sin6_addr) != 1) {
394 DEBUG(DEBUG_ERR, (__location__ " Failed to translate %s into sin6_addr\n", s));
398 if (ifaces && IN6_IS_ADDR_LINKLOCAL(&saddr->ip6.sin6_addr)) {
399 if (strchr(ifaces, ',')) {
400 DEBUG(DEBUG_ERR, (__location__ " Link local address %s "
401 "is specified for multiple ifaces %s\n",
405 saddr->ip6.sin6_scope_id = if_nametoindex(ifaces);
413 bool parse_ip_port(const char *addr, ctdb_sock_addr *saddr)
415 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
421 s = talloc_strdup(tmp_ctx, addr);
423 DEBUG(DEBUG_ERR, (__location__ " Failed strdup()\n"));
424 talloc_free(tmp_ctx);
430 DEBUG(DEBUG_ERR, (__location__ " This addr: %s does not contain a port number\n", s));
431 talloc_free(tmp_ctx);
435 port = strtoul(p+1, &endp, 10);
436 if (endp == NULL || *endp != 0) {
437 /* trailing garbage */
438 DEBUG(DEBUG_ERR, (__location__ " Trailing garbage after the port in %s\n", s));
439 talloc_free(tmp_ctx);
445 /* now is this a ipv4 or ipv6 address ?*/
446 ret = parse_ip(s, NULL, port, saddr);
448 talloc_free(tmp_ctx);
455 bool parse_ip(const char *addr, const char *ifaces, unsigned port, ctdb_sock_addr *saddr)
460 /* now is this a ipv4 or ipv6 address ?*/
461 p = index(addr, ':');
463 ret = parse_ipv4(addr, port, &saddr->ip);
465 ret = parse_ipv6(addr, ifaces, port, saddr);
474 bool parse_ip_mask(const char *str, const char *ifaces, ctdb_sock_addr *addr, unsigned *mask)
476 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
482 s = talloc_strdup(tmp_ctx, str);
484 DEBUG(DEBUG_ERR, (__location__ " Failed strdup()\n"));
485 talloc_free(tmp_ctx);
491 DEBUG(DEBUG_ERR, (__location__ " This addr: %s does not contain a mask\n", s));
492 talloc_free(tmp_ctx);
496 *mask = strtoul(p+1, &endp, 10);
497 if (endp == NULL || *endp != 0) {
498 /* trailing garbage */
499 DEBUG(DEBUG_ERR, (__location__ " Trailing garbage after the mask in %s\n", s));
500 talloc_free(tmp_ctx);
506 /* now is this a ipv4 or ipv6 address ?*/
507 ret = parse_ip(s, ifaces, 0, addr);
509 talloc_free(tmp_ctx);
514 This is used to canonicalize a ctdb_sock_addr structure.
516 void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip)
518 char prefix[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
520 memcpy(cip, ip, sizeof (*cip));
522 if ( (ip->sa.sa_family == AF_INET6)
523 && !memcmp(&ip->ip6.sin6_addr, prefix, 12)) {
524 memset(cip, 0, sizeof(*cip));
525 #ifdef HAVE_SOCK_SIN_LEN
526 cip->ip.sin_len = sizeof(*cip);
528 cip->ip.sin_family = AF_INET;
529 cip->ip.sin_port = ip->ip6.sin6_port;
530 memcpy(&cip->ip.sin_addr, &ip->ip6.sin6_addr.s6_addr32[3], 4);
534 bool ctdb_same_ip(const ctdb_sock_addr *tip1, const ctdb_sock_addr *tip2)
536 ctdb_sock_addr ip1, ip2;
538 ctdb_canonicalize_ip(tip1, &ip1);
539 ctdb_canonicalize_ip(tip2, &ip2);
541 if (ip1.sa.sa_family != ip2.sa.sa_family) {
545 switch (ip1.sa.sa_family) {
547 return ip1.ip.sin_addr.s_addr == ip2.ip.sin_addr.s_addr;
549 return !memcmp(&ip1.ip6.sin6_addr.s6_addr[0],
550 &ip2.ip6.sin6_addr.s6_addr[0],
553 DEBUG(DEBUG_ERR, (__location__ " CRITICAL Can not compare sockaddr structures of type %u\n", ip1.sa.sa_family));
561 compare two ctdb_sock_addr structures
563 bool ctdb_same_sockaddr(const ctdb_sock_addr *ip1, const ctdb_sock_addr *ip2)
565 return ctdb_same_ip(ip1, ip2) && ip1->ip.sin_port == ip2->ip.sin_port;
568 char *ctdb_addr_to_str(ctdb_sock_addr *addr)
570 static char cip[128] = "";
572 switch (addr->sa.sa_family) {
574 inet_ntop(addr->ip.sin_family, &addr->ip.sin_addr, cip, sizeof(cip));
577 inet_ntop(addr->ip6.sin6_family, &addr->ip6.sin6_addr, cip, sizeof(cip));
580 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
586 unsigned ctdb_addr_to_port(ctdb_sock_addr *addr)
588 switch (addr->sa.sa_family) {
590 return ntohs(addr->ip.sin_port);
593 return ntohs(addr->ip6.sin6_port);
596 DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
602 void ctdb_block_signal(int signum)
606 sigaddset(&set,signum);
607 sigprocmask(SIG_BLOCK,&set,NULL);
610 void ctdb_unblock_signal(int signum)
614 sigaddset(&set,signum);
615 sigprocmask(SIG_UNBLOCK,&set,NULL);
618 struct debug_levels debug_levels[] = {
619 {DEBUG_EMERG, "EMERG"},
620 {DEBUG_ALERT, "ALERT"},
621 {DEBUG_CRIT, "CRIT"},
623 {DEBUG_WARNING, "WARNING"},
624 {DEBUG_NOTICE, "NOTICE"},
625 {DEBUG_INFO, "INFO"},
626 {DEBUG_DEBUG, "DEBUG"},
630 const char *get_debug_by_level(int32_t level)
634 for (i=0; debug_levels[i].description != NULL; i++) {
635 if (debug_levels[i].level == level) {
636 return debug_levels[i].description;
642 int32_t get_debug_by_desc(const char *desc)
646 for (i=0; debug_levels[i].description != NULL; i++) {
647 if (!strcmp(debug_levels[i].description, desc)) {
648 return debug_levels[i].level;
655 /* we don't lock future pages here; it would increase the chance that
656 * we'd fail to mmap later on. */
657 void ctdb_lockdown_memory(struct ctdb_context *ctdb)
660 /* Extra stack, please! */
662 memset(dummy, 0, sizeof(dummy));
664 if (ctdb->valgrinding) {
668 /* Avoid compiler optimizing out dummy. */
669 mlock(dummy, sizeof(dummy));
670 if (mlockall(MCL_CURRENT) != 0) {
671 DEBUG(DEBUG_WARNING,("Failed to lock memory: %s'\n",
677 const char *ctdb_eventscript_call_names[] = {
694 setup the local socket name
696 int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname)
698 ctdb->daemon.name = talloc_strdup(ctdb, socketname);
699 CTDB_NO_MEMORY(ctdb, ctdb->daemon.name);