4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 /* for talloc_append_string() */
22 #define TALLOC_DEPRECATED 1
26 #include "lib/tdb/include/tdb.h"
27 #include "lib/util/dlinklist.h"
28 #include "lib/tevent/tevent.h"
29 #include "system/network.h"
30 #include "system/filesys.h"
31 #include "system/locale.h"
33 #include "../include/ctdb_private.h"
34 #include "lib/util/dlinklist.h"
39 allocate a packet for use in client<->daemon communication
41 struct ctdb_req_header *_ctdbd_allocate_pkt(struct ctdb_context *ctdb,
43 enum ctdb_operation operation,
44 size_t length, size_t slength,
48 struct ctdb_req_header *hdr;
50 length = MAX(length, slength);
51 size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
53 hdr = (struct ctdb_req_header *)talloc_size(mem_ctx, size);
55 DEBUG(DEBUG_ERR,("Unable to allocate packet for operation %u of length %u\n",
56 operation, (unsigned)length));
59 talloc_set_name_const(hdr, type);
60 memset(hdr, 0, slength);
62 hdr->operation = operation;
63 hdr->ctdb_magic = CTDB_MAGIC;
64 hdr->ctdb_version = CTDB_VERSION;
65 hdr->srcnode = ctdb->pnn;
67 hdr->generation = ctdb->vnn_map->generation;
74 local version of ctdb_call
76 int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
77 struct ctdb_ltdb_header *header, TALLOC_CTX *mem_ctx,
78 TDB_DATA *data, uint32_t caller)
80 struct ctdb_call_info *c;
81 struct ctdb_registered_call *fn;
82 struct ctdb_context *ctdb = ctdb_db->ctdb;
84 c = talloc(ctdb, struct ctdb_call_info);
85 CTDB_NO_MEMORY(ctdb, c);
88 c->call_data = &call->call_data;
89 c->record_data.dptr = talloc_memdup(c, data->dptr, data->dsize);
90 c->record_data.dsize = data->dsize;
91 CTDB_NO_MEMORY(ctdb, c->record_data.dptr);
96 for (fn=ctdb_db->calls;fn;fn=fn->next) {
97 if (fn->id == call->call_id) break;
100 ctdb_set_error(ctdb, "Unknown call id %u\n", call->call_id);
105 if (fn->fn(c) != 0) {
106 ctdb_set_error(ctdb, "ctdb_call %u failed\n", call->call_id);
111 if (header->laccessor != caller) {
114 header->laccessor = caller;
117 /* we need to force the record to be written out if this was a remote access,
118 so that the lacount is updated */
119 if (c->new_data == NULL && header->laccessor != ctdb->pnn) {
120 c->new_data = &c->record_data;
124 /* XXX check that we always have the lock here? */
125 if (ctdb_ltdb_store(ctdb_db, call->key, header, *c->new_data) != 0) {
126 ctdb_set_error(ctdb, "ctdb_call tdb_store failed\n");
133 call->reply_data = *c->reply_data;
135 talloc_steal(call, call->reply_data.dptr);
136 talloc_set_name_const(call->reply_data.dptr, __location__);
138 call->reply_data.dptr = NULL;
139 call->reply_data.dsize = 0;
141 call->status = c->status;
150 queue a packet for sending from client to daemon
152 static int ctdb_client_queue_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
154 return ctdb_queue_send(ctdb->daemon.queue, (uint8_t *)hdr, hdr->length);
159 called when a CTDB_REPLY_CALL packet comes in in the client
161 This packet comes in response to a CTDB_REQ_CALL request packet. It
162 contains any reply data from the call
164 static void ctdb_client_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
166 struct ctdb_reply_call *c = (struct ctdb_reply_call *)hdr;
167 struct ctdb_client_call_state *state;
169 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_call_state);
171 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
175 if (hdr->reqid != state->reqid) {
176 /* we found a record but it was the wrong one */
177 DEBUG(DEBUG_ERR, ("Dropped client call reply with reqid:%u\n",hdr->reqid));
181 state->call->reply_data.dptr = c->data;
182 state->call->reply_data.dsize = c->datalen;
183 state->call->status = c->status;
185 talloc_steal(state, c);
187 state->state = CTDB_CALL_DONE;
189 if (state->async.fn) {
190 state->async.fn(state);
194 static void ctdb_client_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
197 this is called in the client, when data comes in from the daemon
199 static void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args)
201 struct ctdb_context *ctdb = talloc_get_type(args, struct ctdb_context);
202 struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
205 /* place the packet as a child of a tmp_ctx. We then use
206 talloc_free() below to free it. If any of the calls want
207 to keep it, then they will steal it somewhere else, and the
208 talloc_free() will be a no-op */
209 tmp_ctx = talloc_new(ctdb);
210 talloc_steal(tmp_ctx, hdr);
213 DEBUG(DEBUG_INFO,("Daemon has exited - shutting down client\n"));
217 if (cnt < sizeof(*hdr)) {
218 DEBUG(DEBUG_CRIT,("Bad packet length %u in client\n", (unsigned)cnt));
221 if (cnt != hdr->length) {
222 ctdb_set_error(ctdb, "Bad header length %u expected %u in client\n",
223 (unsigned)hdr->length, (unsigned)cnt);
227 if (hdr->ctdb_magic != CTDB_MAGIC) {
228 ctdb_set_error(ctdb, "Non CTDB packet rejected in client\n");
232 if (hdr->ctdb_version != CTDB_VERSION) {
233 ctdb_set_error(ctdb, "Bad CTDB version 0x%x rejected in client\n", hdr->ctdb_version);
237 switch (hdr->operation) {
238 case CTDB_REPLY_CALL:
239 ctdb_client_reply_call(ctdb, hdr);
242 case CTDB_REQ_MESSAGE:
243 ctdb_request_message(ctdb, hdr);
246 case CTDB_REPLY_CONTROL:
247 ctdb_client_reply_control(ctdb, hdr);
251 DEBUG(DEBUG_CRIT,("bogus operation code:%u\n",hdr->operation));
255 talloc_free(tmp_ctx);
259 connect to a unix domain socket
261 int ctdb_socket_connect(struct ctdb_context *ctdb)
263 struct sockaddr_un addr;
265 memset(&addr, 0, sizeof(addr));
266 addr.sun_family = AF_UNIX;
267 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
269 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
270 if (ctdb->daemon.sd == -1) {
271 DEBUG(DEBUG_ERR,(__location__ " Failed to open client socket. Errno:%s(%d)\n", strerror(errno), errno));
275 set_nonblocking(ctdb->daemon.sd);
276 set_close_on_exec(ctdb->daemon.sd);
278 if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
279 close(ctdb->daemon.sd);
280 ctdb->daemon.sd = -1;
281 DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon. Errno:%s(%d)\n", strerror(errno), errno));
285 ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
287 ctdb_client_read_cb, ctdb, "to-ctdbd");
292 struct ctdb_record_handle {
293 struct ctdb_db_context *ctdb_db;
296 struct ctdb_ltdb_header header;
301 make a recv call to the local ctdb daemon - called from client context
303 This is called when the program wants to wait for a ctdb_call to complete and get the
304 results. This call will block unless the call has already completed.
306 int ctdb_call_recv(struct ctdb_client_call_state *state, struct ctdb_call *call)
312 while (state->state < CTDB_CALL_DONE) {
313 event_loop_once(state->ctdb_db->ctdb->ev);
315 if (state->state != CTDB_CALL_DONE) {
316 DEBUG(DEBUG_ERR,(__location__ " ctdb_call_recv failed\n"));
321 if (state->call->reply_data.dsize) {
322 call->reply_data.dptr = talloc_memdup(state->ctdb_db,
323 state->call->reply_data.dptr,
324 state->call->reply_data.dsize);
325 call->reply_data.dsize = state->call->reply_data.dsize;
327 call->reply_data.dptr = NULL;
328 call->reply_data.dsize = 0;
330 call->status = state->call->status;
340 destroy a ctdb_call in client
342 static int ctdb_client_call_destructor(struct ctdb_client_call_state *state)
344 ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
349 construct an event driven local ctdb_call
351 this is used so that locally processed ctdb_call requests are processed
352 in an event driven manner
354 static struct ctdb_client_call_state *ctdb_client_call_local_send(struct ctdb_db_context *ctdb_db,
355 struct ctdb_call *call,
356 struct ctdb_ltdb_header *header,
359 struct ctdb_client_call_state *state;
360 struct ctdb_context *ctdb = ctdb_db->ctdb;
363 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
364 CTDB_NO_MEMORY_NULL(ctdb, state);
365 state->call = talloc_zero(state, struct ctdb_call);
366 CTDB_NO_MEMORY_NULL(ctdb, state->call);
368 talloc_steal(state, data->dptr);
370 state->state = CTDB_CALL_DONE;
371 *(state->call) = *call;
372 state->ctdb_db = ctdb_db;
374 ret = ctdb_call_local(ctdb_db, state->call, header, state, data, ctdb->pnn);
380 make a ctdb call to the local daemon - async send. Called from client context.
382 This constructs a ctdb_call request and queues it for processing.
383 This call never blocks.
385 struct ctdb_client_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db,
386 struct ctdb_call *call)
388 struct ctdb_client_call_state *state;
389 struct ctdb_context *ctdb = ctdb_db->ctdb;
390 struct ctdb_ltdb_header header;
394 struct ctdb_req_call *c;
396 /* if the domain socket is not yet open, open it */
397 if (ctdb->daemon.sd==-1) {
398 ctdb_socket_connect(ctdb);
401 ret = ctdb_ltdb_lock(ctdb_db, call->key);
403 DEBUG(DEBUG_ERR,(__location__ " Failed to get chainlock\n"));
407 ret = ctdb_ltdb_fetch(ctdb_db, call->key, &header, ctdb_db, &data);
409 if (ret == 0 && header.dmaster == ctdb->pnn) {
410 state = ctdb_client_call_local_send(ctdb_db, call, &header, &data);
411 talloc_free(data.dptr);
412 ctdb_ltdb_unlock(ctdb_db, call->key);
416 ctdb_ltdb_unlock(ctdb_db, call->key);
417 talloc_free(data.dptr);
419 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
421 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state\n"));
424 state->call = talloc_zero(state, struct ctdb_call);
425 if (state->call == NULL) {
426 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state->call\n"));
430 len = offsetof(struct ctdb_req_call, data) + call->key.dsize + call->call_data.dsize;
431 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CALL, len, struct ctdb_req_call);
433 DEBUG(DEBUG_ERR, (__location__ " failed to allocate packet\n"));
437 state->reqid = ctdb_reqid_new(ctdb, state);
438 state->ctdb_db = ctdb_db;
439 talloc_set_destructor(state, ctdb_client_call_destructor);
441 c->hdr.reqid = state->reqid;
442 c->flags = call->flags;
443 c->db_id = ctdb_db->db_id;
444 c->callid = call->call_id;
446 c->keylen = call->key.dsize;
447 c->calldatalen = call->call_data.dsize;
448 memcpy(&c->data[0], call->key.dptr, call->key.dsize);
449 memcpy(&c->data[call->key.dsize],
450 call->call_data.dptr, call->call_data.dsize);
451 *(state->call) = *call;
452 state->call->call_data.dptr = &c->data[call->key.dsize];
453 state->call->key.dptr = &c->data[0];
455 state->state = CTDB_CALL_WAIT;
458 ctdb_client_queue_pkt(ctdb, &c->hdr);
465 full ctdb_call. Equivalent to a ctdb_call_send() followed by a ctdb_call_recv()
467 int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
469 struct ctdb_client_call_state *state;
471 state = ctdb_call_send(ctdb_db, call);
472 return ctdb_call_recv(state, call);
477 tell the daemon what messaging srvid we will use, and register the message
478 handler function in the client
480 int ctdb_client_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
481 ctdb_msg_fn_t handler,
488 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_REGISTER_SRVID, 0,
489 tdb_null, NULL, NULL, &status, NULL, NULL);
490 if (res != 0 || status != 0) {
491 DEBUG(DEBUG_ERR,("Failed to register srvid %llu\n", (unsigned long long)srvid));
495 /* also need to register the handler with our own ctdb structure */
496 return ctdb_register_message_handler(ctdb, ctdb, srvid, handler, private_data);
500 tell the daemon we no longer want a srvid
502 int ctdb_client_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
507 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_DEREGISTER_SRVID, 0,
508 tdb_null, NULL, NULL, &status, NULL, NULL);
509 if (res != 0 || status != 0) {
510 DEBUG(DEBUG_ERR,("Failed to deregister srvid %llu\n", (unsigned long long)srvid));
514 /* also need to register the handler with our own ctdb structure */
515 ctdb_deregister_message_handler(ctdb, srvid, private_data);
521 send a message - from client context
523 int ctdb_client_send_message(struct ctdb_context *ctdb, uint32_t pnn,
524 uint64_t srvid, TDB_DATA data)
526 struct ctdb_req_message *r;
529 len = offsetof(struct ctdb_req_message, data) + data.dsize;
530 r = ctdbd_allocate_pkt(ctdb, ctdb, CTDB_REQ_MESSAGE,
531 len, struct ctdb_req_message);
532 CTDB_NO_MEMORY(ctdb, r);
534 r->hdr.destnode = pnn;
536 r->datalen = data.dsize;
537 memcpy(&r->data[0], data.dptr, data.dsize);
539 res = ctdb_client_queue_pkt(ctdb, &r->hdr);
550 cancel a ctdb_fetch_lock operation, releasing the lock
552 static int fetch_lock_destructor(struct ctdb_record_handle *h)
554 ctdb_ltdb_unlock(h->ctdb_db, h->key);
559 force the migration of a record to this node
561 static int ctdb_client_force_migration(struct ctdb_db_context *ctdb_db, TDB_DATA key)
563 struct ctdb_call call;
565 call.call_id = CTDB_NULL_FUNC;
567 call.flags = CTDB_IMMEDIATE_MIGRATION;
568 return ctdb_call(ctdb_db, &call);
572 get a lock on a record, and return the records data. Blocks until it gets the lock
574 struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
575 TDB_DATA key, TDB_DATA *data)
578 struct ctdb_record_handle *h;
581 procedure is as follows:
583 1) get the chain lock.
584 2) check if we are dmaster
585 3) if we are the dmaster then return handle
586 4) if not dmaster then ask ctdb daemon to make us dmaster, and wait for
588 5) when we get the reply, goto (1)
591 h = talloc_zero(mem_ctx, struct ctdb_record_handle);
596 h->ctdb_db = ctdb_db;
598 h->key.dptr = talloc_memdup(h, key.dptr, key.dsize);
599 if (h->key.dptr == NULL) {
605 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: key=%*.*s\n", (int)key.dsize, (int)key.dsize,
606 (const char *)key.dptr));
609 /* step 1 - get the chain lock */
610 ret = ctdb_ltdb_lock(ctdb_db, key);
612 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
617 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: got chain lock\n"));
619 talloc_set_destructor(h, fetch_lock_destructor);
621 ret = ctdb_ltdb_fetch(ctdb_db, key, &h->header, h, data);
623 /* when torturing, ensure we test the remote path */
624 if ((ctdb_db->ctdb->flags & CTDB_FLAG_TORTURE) &&
626 h->header.dmaster = (uint32_t)-1;
630 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: done local fetch\n"));
632 if (ret != 0 || h->header.dmaster != ctdb_db->ctdb->pnn) {
633 ctdb_ltdb_unlock(ctdb_db, key);
634 ret = ctdb_client_force_migration(ctdb_db, key);
636 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: force_migration failed\n"));
643 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: we are dmaster - done\n"));
648 store some data to the record that was locked with ctdb_fetch_lock()
650 int ctdb_record_store(struct ctdb_record_handle *h, TDB_DATA data)
652 if (h->ctdb_db->persistent) {
653 DEBUG(DEBUG_ERR, (__location__ " ctdb_record_store prohibited for persistent dbs\n"));
657 return ctdb_ltdb_store(h->ctdb_db, h->key, &h->header, data);
661 non-locking fetch of a record
663 int ctdb_fetch(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
664 TDB_DATA key, TDB_DATA *data)
666 struct ctdb_call call;
669 call.call_id = CTDB_FETCH_FUNC;
670 call.call_data.dptr = NULL;
671 call.call_data.dsize = 0;
673 ret = ctdb_call(ctdb_db, &call);
676 *data = call.reply_data;
677 talloc_steal(mem_ctx, data->dptr);
686 called when a control completes or timesout to invoke the callback
687 function the user provided
689 static void invoke_control_callback(struct event_context *ev, struct timed_event *te,
690 struct timeval t, void *private_data)
692 struct ctdb_client_control_state *state;
693 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
696 state = talloc_get_type(private_data, struct ctdb_client_control_state);
697 talloc_steal(tmp_ctx, state);
699 ret = ctdb_control_recv(state->ctdb, state, state,
704 talloc_free(tmp_ctx);
708 called when a CTDB_REPLY_CONTROL packet comes in in the client
710 This packet comes in response to a CTDB_REQ_CONTROL request packet. It
711 contains any reply data from the control
713 static void ctdb_client_reply_control(struct ctdb_context *ctdb,
714 struct ctdb_req_header *hdr)
716 struct ctdb_reply_control *c = (struct ctdb_reply_control *)hdr;
717 struct ctdb_client_control_state *state;
719 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_control_state);
721 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
725 if (hdr->reqid != state->reqid) {
726 /* we found a record but it was the wrong one */
727 DEBUG(DEBUG_ERR, ("Dropped orphaned reply control with reqid:%u\n",hdr->reqid));
731 state->outdata.dptr = c->data;
732 state->outdata.dsize = c->datalen;
733 state->status = c->status;
735 state->errormsg = talloc_strndup(state,
736 (char *)&c->data[c->datalen],
740 /* state->outdata now uses resources from c so we dont want c
741 to just dissappear from under us while state is still alive
743 talloc_steal(state, c);
745 state->state = CTDB_CONTROL_DONE;
747 /* if we had a callback registered for this control, pull the response
748 and call the callback.
750 if (state->async.fn) {
751 event_add_timed(ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
757 destroy a ctdb_control in client
759 static int ctdb_control_destructor(struct ctdb_client_control_state *state)
761 ctdb_reqid_remove(state->ctdb, state->reqid);
766 /* time out handler for ctdb_control */
767 static void control_timeout_func(struct event_context *ev, struct timed_event *te,
768 struct timeval t, void *private_data)
770 struct ctdb_client_control_state *state = talloc_get_type(private_data, struct ctdb_client_control_state);
772 DEBUG(DEBUG_ERR,(__location__ " control timed out. reqid:%u opcode:%u "
773 "dstnode:%u\n", state->reqid, state->c->opcode,
774 state->c->hdr.destnode));
776 state->state = CTDB_CONTROL_TIMEOUT;
778 /* if we had a callback registered for this control, pull the response
779 and call the callback.
781 if (state->async.fn) {
782 event_add_timed(state->ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
786 /* async version of send control request */
787 struct ctdb_client_control_state *ctdb_control_send(struct ctdb_context *ctdb,
788 uint32_t destnode, uint64_t srvid,
789 uint32_t opcode, uint32_t flags, TDB_DATA data,
791 struct timeval *timeout,
794 struct ctdb_client_control_state *state;
796 struct ctdb_req_control *c;
803 /* if the domain socket is not yet open, open it */
804 if (ctdb->daemon.sd==-1) {
805 ctdb_socket_connect(ctdb);
808 state = talloc_zero(mem_ctx, struct ctdb_client_control_state);
809 CTDB_NO_MEMORY_NULL(ctdb, state);
812 state->reqid = ctdb_reqid_new(ctdb, state);
813 state->state = CTDB_CONTROL_WAIT;
814 state->errormsg = NULL;
816 talloc_set_destructor(state, ctdb_control_destructor);
818 len = offsetof(struct ctdb_req_control, data) + data.dsize;
819 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CONTROL,
820 len, struct ctdb_req_control);
822 CTDB_NO_MEMORY_NULL(ctdb, c);
823 c->hdr.reqid = state->reqid;
824 c->hdr.destnode = destnode;
829 c->datalen = data.dsize;
831 memcpy(&c->data[0], data.dptr, data.dsize);
835 if (timeout && !timeval_is_zero(timeout)) {
836 event_add_timed(ctdb->ev, state, *timeout, control_timeout_func, state);
839 ret = ctdb_client_queue_pkt(ctdb, &(c->hdr));
845 if (flags & CTDB_CTRL_FLAG_NOREPLY) {
854 /* async version of receive control reply */
855 int ctdb_control_recv(struct ctdb_context *ctdb,
856 struct ctdb_client_control_state *state,
858 TDB_DATA *outdata, int32_t *status, char **errormsg)
862 if (status != NULL) {
865 if (errormsg != NULL) {
873 /* prevent double free of state */
874 tmp_ctx = talloc_new(ctdb);
875 talloc_steal(tmp_ctx, state);
877 /* loop one event at a time until we either timeout or the control
880 while (state->state == CTDB_CONTROL_WAIT) {
881 event_loop_once(ctdb->ev);
884 if (state->state != CTDB_CONTROL_DONE) {
885 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_recv failed\n"));
886 if (state->async.fn) {
887 state->async.fn(state);
889 talloc_free(tmp_ctx);
893 if (state->errormsg) {
894 DEBUG(DEBUG_ERR,("ctdb_control error: '%s'\n", state->errormsg));
896 (*errormsg) = talloc_move(mem_ctx, &state->errormsg);
898 if (state->async.fn) {
899 state->async.fn(state);
901 talloc_free(tmp_ctx);
906 *outdata = state->outdata;
907 outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
911 *status = state->status;
914 if (state->async.fn) {
915 state->async.fn(state);
918 talloc_free(tmp_ctx);
925 send a ctdb control message
926 timeout specifies how long we should wait for a reply.
927 if timeout is NULL we wait indefinitely
929 int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
930 uint32_t opcode, uint32_t flags, TDB_DATA data,
931 TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status,
932 struct timeval *timeout,
935 struct ctdb_client_control_state *state;
937 state = ctdb_control_send(ctdb, destnode, srvid, opcode,
938 flags, data, mem_ctx,
940 return ctdb_control_recv(ctdb, state, mem_ctx, outdata, status,
948 a process exists call. Returns 0 if process exists, -1 otherwise
950 int ctdb_ctrl_process_exists(struct ctdb_context *ctdb, uint32_t destnode, pid_t pid)
956 data.dptr = (uint8_t*)&pid;
957 data.dsize = sizeof(pid);
959 ret = ctdb_control(ctdb, destnode, 0,
960 CTDB_CONTROL_PROCESS_EXISTS, 0, data,
961 NULL, NULL, &status, NULL, NULL);
963 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for process_exists failed\n"));
971 get remote statistics
973 int ctdb_ctrl_statistics(struct ctdb_context *ctdb, uint32_t destnode, struct ctdb_statistics *status)
979 ret = ctdb_control(ctdb, destnode, 0,
980 CTDB_CONTROL_STATISTICS, 0, tdb_null,
981 ctdb, &data, &res, NULL, NULL);
982 if (ret != 0 || res != 0) {
983 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for statistics failed\n"));
987 if (data.dsize != sizeof(struct ctdb_statistics)) {
988 DEBUG(DEBUG_ERR,(__location__ " Wrong statistics size %u - expected %u\n",
989 (unsigned)data.dsize, (unsigned)sizeof(struct ctdb_statistics)));
993 *status = *(struct ctdb_statistics *)data.dptr;
994 talloc_free(data.dptr);
1000 shutdown a remote ctdb node
1002 int ctdb_ctrl_shutdown(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
1004 struct ctdb_client_control_state *state;
1006 state = ctdb_control_send(ctdb, destnode, 0,
1007 CTDB_CONTROL_SHUTDOWN, 0, tdb_null,
1008 NULL, &timeout, NULL);
1009 if (state == NULL) {
1010 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for shutdown failed\n"));
1018 get vnn map from a remote node
1020 int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map **vnnmap)
1025 struct ctdb_vnn_map_wire *map;
1027 ret = ctdb_control(ctdb, destnode, 0,
1028 CTDB_CONTROL_GETVNNMAP, 0, tdb_null,
1029 mem_ctx, &outdata, &res, &timeout, NULL);
1030 if (ret != 0 || res != 0) {
1031 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getvnnmap failed\n"));
1035 map = (struct ctdb_vnn_map_wire *)outdata.dptr;
1036 if (outdata.dsize < offsetof(struct ctdb_vnn_map_wire, map) ||
1037 outdata.dsize != map->size*sizeof(uint32_t) + offsetof(struct ctdb_vnn_map_wire, map)) {
1038 DEBUG(DEBUG_ERR,("Bad vnn map size received in ctdb_ctrl_getvnnmap\n"));
1042 (*vnnmap) = talloc(mem_ctx, struct ctdb_vnn_map);
1043 CTDB_NO_MEMORY(ctdb, *vnnmap);
1044 (*vnnmap)->generation = map->generation;
1045 (*vnnmap)->size = map->size;
1046 (*vnnmap)->map = talloc_array(*vnnmap, uint32_t, map->size);
1048 CTDB_NO_MEMORY(ctdb, (*vnnmap)->map);
1049 memcpy((*vnnmap)->map, map->map, sizeof(uint32_t)*map->size);
1050 talloc_free(outdata.dptr);
1057 get the recovery mode of a remote node
1059 struct ctdb_client_control_state *
1060 ctdb_ctrl_getrecmode_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
1062 return ctdb_control_send(ctdb, destnode, 0,
1063 CTDB_CONTROL_GET_RECMODE, 0, tdb_null,
1064 mem_ctx, &timeout, NULL);
1067 int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmode)
1072 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1074 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmode_recv failed\n"));
1079 *recmode = (uint32_t)res;
1085 int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
1087 struct ctdb_client_control_state *state;
1089 state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx, timeout, destnode);
1090 return ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, state, recmode);
1097 set the recovery mode of a remote node
1099 int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode)
1105 data.dsize = sizeof(uint32_t);
1106 data.dptr = (unsigned char *)&recmode;
1108 ret = ctdb_control(ctdb, destnode, 0,
1109 CTDB_CONTROL_SET_RECMODE, 0, data,
1110 NULL, NULL, &res, &timeout, NULL);
1111 if (ret != 0 || res != 0) {
1112 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmode failed\n"));
1122 get the recovery master of a remote node
1124 struct ctdb_client_control_state *
1125 ctdb_ctrl_getrecmaster_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
1126 struct timeval timeout, uint32_t destnode)
1128 return ctdb_control_send(ctdb, destnode, 0,
1129 CTDB_CONTROL_GET_RECMASTER, 0, tdb_null,
1130 mem_ctx, &timeout, NULL);
1133 int ctdb_ctrl_getrecmaster_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmaster)
1138 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1140 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmaster_recv failed\n"));
1145 *recmaster = (uint32_t)res;
1151 int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
1153 struct ctdb_client_control_state *state;
1155 state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx, timeout, destnode);
1156 return ctdb_ctrl_getrecmaster_recv(ctdb, mem_ctx, state, recmaster);
1161 set the recovery master of a remote node
1163 int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster)
1170 data.dsize = sizeof(uint32_t);
1171 data.dptr = (unsigned char *)&recmaster;
1173 ret = ctdb_control(ctdb, destnode, 0,
1174 CTDB_CONTROL_SET_RECMASTER, 0, data,
1175 NULL, NULL, &res, &timeout, NULL);
1176 if (ret != 0 || res != 0) {
1177 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmaster failed\n"));
1186 get a list of databases off a remote node
1188 int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1189 TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap)
1195 ret = ctdb_control(ctdb, destnode, 0,
1196 CTDB_CONTROL_GET_DBMAP, 0, tdb_null,
1197 mem_ctx, &outdata, &res, &timeout, NULL);
1198 if (ret != 0 || res != 0) {
1199 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed ret:%d res:%d\n", ret, res));
1203 *dbmap = (struct ctdb_dbid_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1204 talloc_free(outdata.dptr);
1210 get a list of nodes (vnn and flags ) from a remote node
1212 int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
1213 struct timeval timeout, uint32_t destnode,
1214 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1220 ret = ctdb_control(ctdb, destnode, 0,
1221 CTDB_CONTROL_GET_NODEMAP, 0, tdb_null,
1222 mem_ctx, &outdata, &res, &timeout, NULL);
1223 if (ret == 0 && res == -1 && outdata.dsize == 0) {
1224 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed, falling back to ipv4-only control\n"));
1225 return ctdb_ctrl_getnodemapv4(ctdb, timeout, destnode, mem_ctx, nodemap);
1227 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1228 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed ret:%d res:%d\n", ret, res));
1232 *nodemap = (struct ctdb_node_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1233 talloc_free(outdata.dptr);
1239 old style ipv4-only get a list of nodes (vnn and flags ) from a remote node
1241 int ctdb_ctrl_getnodemapv4(struct ctdb_context *ctdb,
1242 struct timeval timeout, uint32_t destnode,
1243 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1247 struct ctdb_node_mapv4 *nodemapv4;
1250 ret = ctdb_control(ctdb, destnode, 0,
1251 CTDB_CONTROL_GET_NODEMAPv4, 0, tdb_null,
1252 mem_ctx, &outdata, &res, &timeout, NULL);
1253 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1254 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodesv4 failed ret:%d res:%d\n", ret, res));
1258 nodemapv4 = (struct ctdb_node_mapv4 *)outdata.dptr;
1260 len = offsetof(struct ctdb_node_map, nodes) + nodemapv4->num*sizeof(struct ctdb_node_and_flags);
1261 (*nodemap) = talloc_zero_size(mem_ctx, len);
1262 CTDB_NO_MEMORY(ctdb, (*nodemap));
1264 (*nodemap)->num = nodemapv4->num;
1265 for (i=0; i<nodemapv4->num; i++) {
1266 (*nodemap)->nodes[i].pnn = nodemapv4->nodes[i].pnn;
1267 (*nodemap)->nodes[i].flags = nodemapv4->nodes[i].flags;
1268 (*nodemap)->nodes[i].addr.ip = nodemapv4->nodes[i].sin;
1269 (*nodemap)->nodes[i].addr.sa.sa_family = AF_INET;
1272 talloc_free(outdata.dptr);
1278 drop the transport, reload the nodes file and restart the transport
1280 int ctdb_ctrl_reload_nodes_file(struct ctdb_context *ctdb,
1281 struct timeval timeout, uint32_t destnode)
1286 ret = ctdb_control(ctdb, destnode, 0,
1287 CTDB_CONTROL_RELOAD_NODES_FILE, 0, tdb_null,
1288 NULL, NULL, &res, &timeout, NULL);
1289 if (ret != 0 || res != 0) {
1290 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reloadnodesfile failed\n"));
1299 set vnn map on a node
1301 int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1302 TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap)
1307 struct ctdb_vnn_map_wire *map;
1310 len = offsetof(struct ctdb_vnn_map_wire, map) + sizeof(uint32_t)*vnnmap->size;
1311 map = talloc_size(mem_ctx, len);
1312 CTDB_NO_MEMORY(ctdb, map);
1314 map->generation = vnnmap->generation;
1315 map->size = vnnmap->size;
1316 memcpy(map->map, vnnmap->map, sizeof(uint32_t)*map->size);
1319 data.dptr = (uint8_t *)map;
1321 ret = ctdb_control(ctdb, destnode, 0,
1322 CTDB_CONTROL_SETVNNMAP, 0, data,
1323 NULL, NULL, &res, &timeout, NULL);
1324 if (ret != 0 || res != 0) {
1325 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setvnnmap failed\n"));
1336 async send for pull database
1338 struct ctdb_client_control_state *ctdb_ctrl_pulldb_send(
1339 struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid,
1340 uint32_t lmaster, TALLOC_CTX *mem_ctx, struct timeval timeout)
1343 struct ctdb_control_pulldb *pull;
1344 struct ctdb_client_control_state *state;
1346 pull = talloc(mem_ctx, struct ctdb_control_pulldb);
1347 CTDB_NO_MEMORY_NULL(ctdb, pull);
1350 pull->lmaster = lmaster;
1352 indata.dsize = sizeof(struct ctdb_control_pulldb);
1353 indata.dptr = (unsigned char *)pull;
1355 state = ctdb_control_send(ctdb, destnode, 0,
1356 CTDB_CONTROL_PULL_DB, 0, indata,
1357 mem_ctx, &timeout, NULL);
1364 async recv for pull database
1366 int ctdb_ctrl_pulldb_recv(
1367 struct ctdb_context *ctdb,
1368 TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state,
1374 ret = ctdb_control_recv(ctdb, state, mem_ctx, outdata, &res, NULL);
1375 if ( (ret != 0) || (res != 0) ){
1376 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_pulldb_recv failed\n"));
1384 pull all keys and records for a specific database on a node
1386 int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode,
1387 uint32_t dbid, uint32_t lmaster,
1388 TALLOC_CTX *mem_ctx, struct timeval timeout,
1391 struct ctdb_client_control_state *state;
1393 state = ctdb_ctrl_pulldb_send(ctdb, destnode, dbid, lmaster, mem_ctx,
1396 return ctdb_ctrl_pulldb_recv(ctdb, mem_ctx, state, outdata);
1401 change dmaster for all keys in the database to the new value
1403 int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1404 TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster)
1410 indata.dsize = 2*sizeof(uint32_t);
1411 indata.dptr = (unsigned char *)talloc_array(mem_ctx, uint32_t, 2);
1413 ((uint32_t *)(&indata.dptr[0]))[0] = dbid;
1414 ((uint32_t *)(&indata.dptr[0]))[1] = dmaster;
1416 ret = ctdb_control(ctdb, destnode, 0,
1417 CTDB_CONTROL_SET_DMASTER, 0, indata,
1418 NULL, NULL, &res, &timeout, NULL);
1419 if (ret != 0 || res != 0) {
1420 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setdmaster failed\n"));
1428 ping a node, return number of clients connected
1430 int ctdb_ctrl_ping(struct ctdb_context *ctdb, uint32_t destnode)
1435 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PING, 0,
1436 tdb_null, NULL, NULL, &res, NULL, NULL);
1444 find the real path to a ltdb
1446 int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1453 data.dptr = (uint8_t *)&dbid;
1454 data.dsize = sizeof(dbid);
1456 ret = ctdb_control(ctdb, destnode, 0,
1457 CTDB_CONTROL_GETDBPATH, 0, data,
1458 mem_ctx, &data, &res, &timeout, NULL);
1459 if (ret != 0 || res != 0) {
1463 (*path) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1464 if ((*path) == NULL) {
1468 talloc_free(data.dptr);
1474 find the name of a db
1476 int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1483 data.dptr = (uint8_t *)&dbid;
1484 data.dsize = sizeof(dbid);
1486 ret = ctdb_control(ctdb, destnode, 0,
1487 CTDB_CONTROL_GET_DBNAME, 0, data,
1488 mem_ctx, &data, &res, &timeout, NULL);
1489 if (ret != 0 || res != 0) {
1493 (*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1494 if ((*name) == NULL) {
1498 talloc_free(data.dptr);
1504 get the health status of a db
1506 int ctdb_ctrl_getdbhealth(struct ctdb_context *ctdb,
1507 struct timeval timeout,
1509 uint32_t dbid, TALLOC_CTX *mem_ctx,
1510 const char **reason)
1516 data.dptr = (uint8_t *)&dbid;
1517 data.dsize = sizeof(dbid);
1519 ret = ctdb_control(ctdb, destnode, 0,
1520 CTDB_CONTROL_DB_GET_HEALTH, 0, data,
1521 mem_ctx, &data, &res, &timeout, NULL);
1522 if (ret != 0 || res != 0) {
1526 if (data.dsize == 0) {
1531 (*reason) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1532 if ((*reason) == NULL) {
1536 talloc_free(data.dptr);
1544 int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1545 TALLOC_CTX *mem_ctx, const char *name, bool persistent)
1551 data.dptr = discard_const(name);
1552 data.dsize = strlen(name)+1;
1554 ret = ctdb_control(ctdb, destnode, 0,
1555 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
1557 mem_ctx, &data, &res, &timeout, NULL);
1559 if (ret != 0 || res != 0) {
1567 get debug level on a node
1569 int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level)
1575 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DEBUG, 0, tdb_null,
1576 ctdb, &data, &res, NULL, NULL);
1577 if (ret != 0 || res != 0) {
1580 if (data.dsize != sizeof(int32_t)) {
1581 DEBUG(DEBUG_ERR,("Bad control reply size in ctdb_get_debuglevel (got %u)\n",
1582 (unsigned)data.dsize));
1585 *level = *(int32_t *)data.dptr;
1586 talloc_free(data.dptr);
1591 set debug level on a node
1593 int ctdb_ctrl_set_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t level)
1599 data.dptr = (uint8_t *)&level;
1600 data.dsize = sizeof(level);
1602 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_DEBUG, 0, data,
1603 NULL, NULL, &res, NULL, NULL);
1604 if (ret != 0 || res != 0) {
1612 get a list of connected nodes
1614 uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb,
1615 struct timeval timeout,
1616 TALLOC_CTX *mem_ctx,
1617 uint32_t *num_nodes)
1619 struct ctdb_node_map *map=NULL;
1625 ret = ctdb_ctrl_getnodemap(ctdb, timeout, CTDB_CURRENT_NODE, mem_ctx, &map);
1630 nodes = talloc_array(mem_ctx, uint32_t, map->num);
1631 if (nodes == NULL) {
1635 for (i=0;i<map->num;i++) {
1636 if (!(map->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
1637 nodes[*num_nodes] = map->nodes[i].pnn;
1649 int ctdb_statistics_reset(struct ctdb_context *ctdb, uint32_t destnode)
1654 ret = ctdb_control(ctdb, destnode, 0,
1655 CTDB_CONTROL_STATISTICS_RESET, 0, tdb_null,
1656 NULL, NULL, &res, NULL, NULL);
1657 if (ret != 0 || res != 0) {
1658 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reset statistics failed\n"));
1665 this is the dummy null procedure that all databases support
1667 static int ctdb_null_func(struct ctdb_call_info *call)
1673 this is a plain fetch procedure that all databases support
1675 static int ctdb_fetch_func(struct ctdb_call_info *call)
1677 call->reply_data = &call->record_data;
1682 attach to a specific database - client call
1684 struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name, bool persistent, uint32_t tdb_flags)
1686 struct ctdb_db_context *ctdb_db;
1691 ctdb_db = ctdb_db_handle(ctdb, name);
1696 ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
1697 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db);
1699 ctdb_db->ctdb = ctdb;
1700 ctdb_db->db_name = talloc_strdup(ctdb_db, name);
1701 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db->db_name);
1703 data.dptr = discard_const(name);
1704 data.dsize = strlen(name)+1;
1706 /* tell ctdb daemon to attach */
1707 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, tdb_flags,
1708 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
1709 0, data, ctdb_db, &data, &res, NULL, NULL);
1710 if (ret != 0 || res != 0 || data.dsize != sizeof(uint32_t)) {
1711 DEBUG(DEBUG_ERR,("Failed to attach to database '%s'\n", name));
1712 talloc_free(ctdb_db);
1716 ctdb_db->db_id = *(uint32_t *)data.dptr;
1717 talloc_free(data.dptr);
1719 ret = ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(2, 0), CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
1721 DEBUG(DEBUG_ERR,("Failed to get dbpath for database '%s'\n", name));
1722 talloc_free(ctdb_db);
1726 tdb_flags = persistent?TDB_DEFAULT:TDB_NOSYNC;
1727 if (ctdb->valgrinding) {
1728 tdb_flags |= TDB_NOMMAP;
1730 tdb_flags |= TDB_DISALLOW_NESTING;
1732 ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 0, tdb_flags, O_RDWR, 0);
1733 if (ctdb_db->ltdb == NULL) {
1734 ctdb_set_error(ctdb, "Failed to open tdb '%s'\n", ctdb_db->db_path);
1735 talloc_free(ctdb_db);
1739 ctdb_db->persistent = persistent;
1741 DLIST_ADD(ctdb->db_list, ctdb_db);
1743 /* add well known functions */
1744 ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC);
1745 ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC);
1752 setup a call for a database
1754 int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id)
1756 struct ctdb_registered_call *call;
1761 struct ctdb_control_set_call c;
1764 /* this is no longer valid with the separate daemon architecture */
1765 c.db_id = ctdb_db->db_id;
1769 data.dptr = (uint8_t *)&c;
1770 data.dsize = sizeof(c);
1772 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_SET_CALL, 0,
1773 data, NULL, NULL, &status, NULL, NULL);
1774 if (ret != 0 || status != 0) {
1775 DEBUG(DEBUG_ERR,("ctdb_set_call failed for call %u\n", id));
1780 /* also register locally */
1781 call = talloc(ctdb_db, struct ctdb_registered_call);
1785 DLIST_ADD(ctdb_db->calls, call);
1790 struct traverse_state {
1793 ctdb_traverse_func fn;
1798 called on each key during a ctdb_traverse
1800 static void traverse_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data, void *p)
1802 struct traverse_state *state = (struct traverse_state *)p;
1803 struct ctdb_rec_data *d = (struct ctdb_rec_data *)data.dptr;
1806 if (data.dsize < sizeof(uint32_t) ||
1807 d->length != data.dsize) {
1808 DEBUG(DEBUG_ERR,("Bad data size %u in traverse_handler\n", (unsigned)data.dsize));
1813 key.dsize = d->keylen;
1814 key.dptr = &d->data[0];
1815 data.dsize = d->datalen;
1816 data.dptr = &d->data[d->keylen];
1818 if (key.dsize == 0 && data.dsize == 0) {
1819 /* end of traverse */
1824 if (data.dsize == sizeof(struct ctdb_ltdb_header)) {
1825 /* empty records are deleted records in ctdb */
1829 if (state->fn(ctdb, key, data, state->private_data) != 0) {
1838 start a cluster wide traverse, calling the supplied fn on each record
1839 return the number of records traversed, or -1 on error
1841 int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *private_data)
1844 struct ctdb_traverse_start t;
1847 uint64_t srvid = (getpid() | 0xFLL<<60);
1848 struct traverse_state state;
1852 state.private_data = private_data;
1855 ret = ctdb_client_set_message_handler(ctdb_db->ctdb, srvid, traverse_handler, &state);
1857 DEBUG(DEBUG_ERR,("Failed to setup traverse handler\n"));
1861 t.db_id = ctdb_db->db_id;
1865 data.dptr = (uint8_t *)&t;
1866 data.dsize = sizeof(t);
1868 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_TRAVERSE_START, 0,
1869 data, NULL, NULL, &status, NULL, NULL);
1870 if (ret != 0 || status != 0) {
1871 DEBUG(DEBUG_ERR,("ctdb_traverse_all failed\n"));
1872 ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
1876 while (!state.done) {
1877 event_loop_once(ctdb_db->ctdb->ev);
1880 ret = ctdb_client_remove_message_handler(ctdb_db->ctdb, srvid, &state);
1882 DEBUG(DEBUG_ERR,("Failed to remove ctdb_traverse handler\n"));
1889 #define ISASCII(x) ((x>31)&&(x<128))
1891 called on each key during a catdb
1893 int ctdb_dumpdb_record(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
1896 FILE *f = (FILE *)p;
1897 struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
1899 fprintf(f, "key(%u) = \"", (unsigned)key.dsize);
1900 for (i=0;i<key.dsize;i++) {
1901 if (ISASCII(key.dptr[i])) {
1902 fprintf(f, "%c", key.dptr[i]);
1904 fprintf(f, "\\%02X", key.dptr[i]);
1909 fprintf(f, "dmaster: %u\n", h->dmaster);
1910 fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
1912 fprintf(f, "data(%u) = \"", (unsigned)(data.dsize - sizeof(*h)));
1913 for (i=sizeof(*h);i<data.dsize;i++) {
1914 if (ISASCII(data.dptr[i])) {
1915 fprintf(f, "%c", data.dptr[i]);
1917 fprintf(f, "\\%02X", data.dptr[i]);
1928 convenience function to list all keys to stdout
1930 int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f)
1932 return ctdb_traverse(ctdb_db, ctdb_dumpdb_record, f);
1936 get the pid of a ctdb daemon
1938 int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid)
1943 ret = ctdb_control(ctdb, destnode, 0,
1944 CTDB_CONTROL_GET_PID, 0, tdb_null,
1945 NULL, NULL, &res, &timeout, NULL);
1947 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpid failed\n"));
1958 async freeze send control
1960 struct ctdb_client_control_state *
1961 ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t priority)
1963 return ctdb_control_send(ctdb, destnode, priority,
1964 CTDB_CONTROL_FREEZE, 0, tdb_null,
1965 mem_ctx, &timeout, NULL);
1969 async freeze recv control
1971 int ctdb_ctrl_freeze_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state)
1976 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1977 if ( (ret != 0) || (res != 0) ){
1978 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_freeze_recv failed\n"));
1986 freeze databases of a certain priority
1988 int ctdb_ctrl_freeze_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
1990 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1991 struct ctdb_client_control_state *state;
1994 state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode, priority);
1995 ret = ctdb_ctrl_freeze_recv(ctdb, tmp_ctx, state);
1996 talloc_free(tmp_ctx);
2001 /* Freeze all databases */
2002 int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2006 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
2007 if (ctdb_ctrl_freeze_priority(ctdb, timeout, destnode, i) != 0) {
2015 thaw databases of a certain priority
2017 int ctdb_ctrl_thaw_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
2022 ret = ctdb_control(ctdb, destnode, priority,
2023 CTDB_CONTROL_THAW, 0, tdb_null,
2024 NULL, NULL, &res, &timeout, NULL);
2025 if (ret != 0 || res != 0) {
2026 DEBUG(DEBUG_ERR,(__location__ " ctdb_control thaw failed\n"));
2033 /* thaw all databases */
2034 int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2036 return ctdb_ctrl_thaw_priority(ctdb, timeout, destnode, 0);
2040 get pnn of a node, or -1
2042 int ctdb_ctrl_getpnn(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2047 ret = ctdb_control(ctdb, destnode, 0,
2048 CTDB_CONTROL_GET_PNN, 0, tdb_null,
2049 NULL, NULL, &res, &timeout, NULL);
2051 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpnn failed\n"));
2059 get the monitoring mode of a remote node
2061 int ctdb_ctrl_getmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *monmode)
2066 ret = ctdb_control(ctdb, destnode, 0,
2067 CTDB_CONTROL_GET_MONMODE, 0, tdb_null,
2068 NULL, NULL, &res, &timeout, NULL);
2070 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getmonmode failed\n"));
2081 set the monitoring mode of a remote node to active
2083 int ctdb_ctrl_enable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2088 ret = ctdb_control(ctdb, destnode, 0,
2089 CTDB_CONTROL_ENABLE_MONITOR, 0, tdb_null,
2090 NULL, NULL,NULL, &timeout, NULL);
2092 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enable_monitor failed\n"));
2102 set the monitoring mode of a remote node to disable
2104 int ctdb_ctrl_disable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2109 ret = ctdb_control(ctdb, destnode, 0,
2110 CTDB_CONTROL_DISABLE_MONITOR, 0, tdb_null,
2111 NULL, NULL, NULL, &timeout, NULL);
2113 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disable_monitor failed\n"));
2125 sent to a node to make it take over an ip address
2127 int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout,
2128 uint32_t destnode, struct ctdb_public_ip *ip)
2131 struct ctdb_public_ipv4 ipv4;
2135 if (ip->addr.sa.sa_family == AF_INET) {
2137 ipv4.sin = ip->addr.ip;
2139 data.dsize = sizeof(ipv4);
2140 data.dptr = (uint8_t *)&ipv4;
2142 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IPv4, 0, data, NULL,
2143 NULL, &res, &timeout, NULL);
2145 data.dsize = sizeof(*ip);
2146 data.dptr = (uint8_t *)ip;
2148 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IP, 0, data, NULL,
2149 NULL, &res, &timeout, NULL);
2152 if (ret != 0 || res != 0) {
2153 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for takeover_ip failed\n"));
2162 sent to a node to make it release an ip address
2164 int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout,
2165 uint32_t destnode, struct ctdb_public_ip *ip)
2168 struct ctdb_public_ipv4 ipv4;
2172 if (ip->addr.sa.sa_family == AF_INET) {
2174 ipv4.sin = ip->addr.ip;
2176 data.dsize = sizeof(ipv4);
2177 data.dptr = (uint8_t *)&ipv4;
2179 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IPv4, 0, data, NULL,
2180 NULL, &res, &timeout, NULL);
2182 data.dsize = sizeof(*ip);
2183 data.dptr = (uint8_t *)ip;
2185 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IP, 0, data, NULL,
2186 NULL, &res, &timeout, NULL);
2189 if (ret != 0 || res != 0) {
2190 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for release_ip failed\n"));
2201 int ctdb_ctrl_get_tunable(struct ctdb_context *ctdb,
2202 struct timeval timeout,
2204 const char *name, uint32_t *value)
2206 struct ctdb_control_get_tunable *t;
2207 TDB_DATA data, outdata;
2211 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(name) + 1;
2212 data.dptr = talloc_size(ctdb, data.dsize);
2213 CTDB_NO_MEMORY(ctdb, data.dptr);
2215 t = (struct ctdb_control_get_tunable *)data.dptr;
2216 t->length = strlen(name)+1;
2217 memcpy(t->name, name, t->length);
2219 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_TUNABLE, 0, data, ctdb,
2220 &outdata, &res, &timeout, NULL);
2221 talloc_free(data.dptr);
2222 if (ret != 0 || res != 0) {
2223 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_tunable failed\n"));
2227 if (outdata.dsize != sizeof(uint32_t)) {
2228 DEBUG(DEBUG_ERR,("Invalid return data in get_tunable\n"));
2229 talloc_free(outdata.dptr);
2233 *value = *(uint32_t *)outdata.dptr;
2234 talloc_free(outdata.dptr);
2242 int ctdb_ctrl_set_tunable(struct ctdb_context *ctdb,
2243 struct timeval timeout,
2245 const char *name, uint32_t value)
2247 struct ctdb_control_set_tunable *t;
2252 data.dsize = offsetof(struct ctdb_control_set_tunable, name) + strlen(name) + 1;
2253 data.dptr = talloc_size(ctdb, data.dsize);
2254 CTDB_NO_MEMORY(ctdb, data.dptr);
2256 t = (struct ctdb_control_set_tunable *)data.dptr;
2257 t->length = strlen(name)+1;
2258 memcpy(t->name, name, t->length);
2261 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_TUNABLE, 0, data, NULL,
2262 NULL, &res, &timeout, NULL);
2263 talloc_free(data.dptr);
2264 if (ret != 0 || res != 0) {
2265 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_tunable failed\n"));
2275 int ctdb_ctrl_list_tunables(struct ctdb_context *ctdb,
2276 struct timeval timeout,
2278 TALLOC_CTX *mem_ctx,
2279 const char ***list, uint32_t *count)
2284 struct ctdb_control_list_tunable *t;
2287 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_LIST_TUNABLES, 0, tdb_null,
2288 mem_ctx, &outdata, &res, &timeout, NULL);
2289 if (ret != 0 || res != 0) {
2290 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for list_tunables failed\n"));
2294 t = (struct ctdb_control_list_tunable *)outdata.dptr;
2295 if (outdata.dsize < offsetof(struct ctdb_control_list_tunable, data) ||
2296 t->length > outdata.dsize-offsetof(struct ctdb_control_list_tunable, data)) {
2297 DEBUG(DEBUG_ERR,("Invalid data in list_tunables reply\n"));
2298 talloc_free(outdata.dptr);
2302 p = talloc_strndup(mem_ctx, (char *)t->data, t->length);
2303 CTDB_NO_MEMORY(ctdb, p);
2305 talloc_free(outdata.dptr);
2310 for (s=strtok_r(p, ":", &ptr); s; s=strtok_r(NULL, ":", &ptr)) {
2311 (*list) = talloc_realloc(mem_ctx, *list, const char *, 1+(*count));
2312 CTDB_NO_MEMORY(ctdb, *list);
2313 (*list)[*count] = talloc_strdup(*list, s);
2314 CTDB_NO_MEMORY(ctdb, (*list)[*count]);
2324 int ctdb_ctrl_get_public_ips_flags(struct ctdb_context *ctdb,
2325 struct timeval timeout, uint32_t destnode,
2326 TALLOC_CTX *mem_ctx,
2328 struct ctdb_all_public_ips **ips)
2334 ret = ctdb_control(ctdb, destnode, 0,
2335 CTDB_CONTROL_GET_PUBLIC_IPS, flags, tdb_null,
2336 mem_ctx, &outdata, &res, &timeout, NULL);
2337 if (ret == 0 && res == -1) {
2338 DEBUG(DEBUG_ERR,(__location__ " ctdb_control to get public ips failed, falling back to ipv4-only version\n"));
2339 return ctdb_ctrl_get_public_ipsv4(ctdb, timeout, destnode, mem_ctx, ips);
2341 if (ret != 0 || res != 0) {
2342 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed ret:%d res:%d\n", ret, res));
2346 *ips = (struct ctdb_all_public_ips *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
2347 talloc_free(outdata.dptr);
2352 int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
2353 struct timeval timeout, uint32_t destnode,
2354 TALLOC_CTX *mem_ctx,
2355 struct ctdb_all_public_ips **ips)
2357 return ctdb_ctrl_get_public_ips_flags(ctdb, timeout,
2362 int ctdb_ctrl_get_public_ipsv4(struct ctdb_context *ctdb,
2363 struct timeval timeout, uint32_t destnode,
2364 TALLOC_CTX *mem_ctx, struct ctdb_all_public_ips **ips)
2369 struct ctdb_all_public_ipsv4 *ipsv4;
2371 ret = ctdb_control(ctdb, destnode, 0,
2372 CTDB_CONTROL_GET_PUBLIC_IPSv4, 0, tdb_null,
2373 mem_ctx, &outdata, &res, &timeout, NULL);
2374 if (ret != 0 || res != 0) {
2375 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed\n"));
2379 ipsv4 = (struct ctdb_all_public_ipsv4 *)outdata.dptr;
2380 len = offsetof(struct ctdb_all_public_ips, ips) +
2381 ipsv4->num*sizeof(struct ctdb_public_ip);
2382 *ips = talloc_zero_size(mem_ctx, len);
2383 CTDB_NO_MEMORY(ctdb, *ips);
2384 (*ips)->num = ipsv4->num;
2385 for (i=0; i<ipsv4->num; i++) {
2386 (*ips)->ips[i].pnn = ipsv4->ips[i].pnn;
2387 (*ips)->ips[i].addr.ip = ipsv4->ips[i].sin;
2390 talloc_free(outdata.dptr);
2395 int ctdb_ctrl_get_public_ip_info(struct ctdb_context *ctdb,
2396 struct timeval timeout, uint32_t destnode,
2397 TALLOC_CTX *mem_ctx,
2398 const ctdb_sock_addr *addr,
2399 struct ctdb_control_public_ip_info **_info)
2405 struct ctdb_control_public_ip_info *info;
2409 indata.dptr = discard_const_p(uint8_t, addr);
2410 indata.dsize = sizeof(*addr);
2412 ret = ctdb_control(ctdb, destnode, 0,
2413 CTDB_CONTROL_GET_PUBLIC_IP_INFO, 0, indata,
2414 mem_ctx, &outdata, &res, &timeout, NULL);
2415 if (ret != 0 || res != 0) {
2416 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2417 "failed ret:%d res:%d\n",
2422 len = offsetof(struct ctdb_control_public_ip_info, ifaces);
2423 if (len > outdata.dsize) {
2424 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2425 "returned invalid data with size %u > %u\n",
2426 (unsigned int)outdata.dsize,
2427 (unsigned int)len));
2428 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2432 info = (struct ctdb_control_public_ip_info *)outdata.dptr;
2433 len += info->num*sizeof(struct ctdb_control_iface_info);
2435 if (len > outdata.dsize) {
2436 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2437 "returned invalid data with size %u > %u\n",
2438 (unsigned int)outdata.dsize,
2439 (unsigned int)len));
2440 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2444 /* make sure we null terminate the returned strings */
2445 for (i=0; i < info->num; i++) {
2446 info->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2449 *_info = (struct ctdb_control_public_ip_info *)talloc_memdup(mem_ctx,
2452 talloc_free(outdata.dptr);
2453 if (*_info == NULL) {
2454 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get public ip info "
2455 "talloc_memdup size %u failed\n",
2456 (unsigned int)outdata.dsize));
2463 int ctdb_ctrl_get_ifaces(struct ctdb_context *ctdb,
2464 struct timeval timeout, uint32_t destnode,
2465 TALLOC_CTX *mem_ctx,
2466 struct ctdb_control_get_ifaces **_ifaces)
2471 struct ctdb_control_get_ifaces *ifaces;
2475 ret = ctdb_control(ctdb, destnode, 0,
2476 CTDB_CONTROL_GET_IFACES, 0, tdb_null,
2477 mem_ctx, &outdata, &res, &timeout, NULL);
2478 if (ret != 0 || res != 0) {
2479 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2480 "failed ret:%d res:%d\n",
2485 len = offsetof(struct ctdb_control_get_ifaces, ifaces);
2486 if (len > outdata.dsize) {
2487 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2488 "returned invalid data with size %u > %u\n",
2489 (unsigned int)outdata.dsize,
2490 (unsigned int)len));
2491 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2495 ifaces = (struct ctdb_control_get_ifaces *)outdata.dptr;
2496 len += ifaces->num*sizeof(struct ctdb_control_iface_info);
2498 if (len > outdata.dsize) {
2499 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2500 "returned invalid data with size %u > %u\n",
2501 (unsigned int)outdata.dsize,
2502 (unsigned int)len));
2503 dump_data(DEBUG_DEBUG, outdata.dptr, outdata.dsize);
2507 /* make sure we null terminate the returned strings */
2508 for (i=0; i < ifaces->num; i++) {
2509 ifaces->ifaces[i].name[CTDB_IFACE_SIZE] = '\0';
2512 *_ifaces = (struct ctdb_control_get_ifaces *)talloc_memdup(mem_ctx,
2515 talloc_free(outdata.dptr);
2516 if (*_ifaces == NULL) {
2517 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get ifaces "
2518 "talloc_memdup size %u failed\n",
2519 (unsigned int)outdata.dsize));
2526 int ctdb_ctrl_set_iface_link(struct ctdb_context *ctdb,
2527 struct timeval timeout, uint32_t destnode,
2528 TALLOC_CTX *mem_ctx,
2529 const struct ctdb_control_iface_info *info)
2535 indata.dptr = discard_const_p(uint8_t, info);
2536 indata.dsize = sizeof(*info);
2538 ret = ctdb_control(ctdb, destnode, 0,
2539 CTDB_CONTROL_SET_IFACE_LINK_STATE, 0, indata,
2540 mem_ctx, NULL, &res, &timeout, NULL);
2541 if (ret != 0 || res != 0) {
2542 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set iface link "
2543 "failed ret:%d res:%d\n",
2552 set/clear the permanent disabled bit on a remote node
2554 int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
2555 uint32_t set, uint32_t clear)
2559 struct ctdb_node_map *nodemap=NULL;
2560 struct ctdb_node_flag_change c;
2561 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2566 /* find the recovery master */
2567 ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster);
2569 DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
2570 talloc_free(tmp_ctx);
2575 /* read the node flags from the recmaster */
2576 ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap);
2578 DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode));
2579 talloc_free(tmp_ctx);
2582 if (destnode >= nodemap->num) {
2583 DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode));
2584 talloc_free(tmp_ctx);
2589 c.old_flags = nodemap->nodes[destnode].flags;
2590 c.new_flags = c.old_flags;
2592 c.new_flags &= ~clear;
2594 data.dsize = sizeof(c);
2595 data.dptr = (unsigned char *)&c;
2597 /* send the flags update to all connected nodes */
2598 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2600 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
2602 timeout, false, data,
2605 DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
2607 talloc_free(tmp_ctx);
2611 talloc_free(tmp_ctx);
2619 int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb,
2620 struct timeval timeout,
2622 struct ctdb_tunable *tunables)
2628 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_ALL_TUNABLES, 0, tdb_null, ctdb,
2629 &outdata, &res, &timeout, NULL);
2630 if (ret != 0 || res != 0) {
2631 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get all tunables failed\n"));
2635 if (outdata.dsize != sizeof(*tunables)) {
2636 DEBUG(DEBUG_ERR,(__location__ " bad data size %u in ctdb_ctrl_get_all_tunables should be %u\n",
2637 (unsigned)outdata.dsize, (unsigned)sizeof(*tunables)));
2641 *tunables = *(struct ctdb_tunable *)outdata.dptr;
2642 talloc_free(outdata.dptr);
2647 add a public address to a node
2649 int ctdb_ctrl_add_public_ip(struct ctdb_context *ctdb,
2650 struct timeval timeout,
2652 struct ctdb_control_ip_iface *pub)
2658 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
2659 data.dptr = (unsigned char *)pub;
2661 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_ADD_PUBLIC_IP, 0, data, NULL,
2662 NULL, &res, &timeout, NULL);
2663 if (ret != 0 || res != 0) {
2664 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for add_public_ip failed\n"));
2672 delete a public address from a node
2674 int ctdb_ctrl_del_public_ip(struct ctdb_context *ctdb,
2675 struct timeval timeout,
2677 struct ctdb_control_ip_iface *pub)
2683 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
2684 data.dptr = (unsigned char *)pub;
2686 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_DEL_PUBLIC_IP, 0, data, NULL,
2687 NULL, &res, &timeout, NULL);
2688 if (ret != 0 || res != 0) {
2689 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for del_public_ip failed\n"));
2697 kill a tcp connection
2699 int ctdb_ctrl_killtcp(struct ctdb_context *ctdb,
2700 struct timeval timeout,
2702 struct ctdb_control_killtcp *killtcp)
2708 data.dsize = sizeof(struct ctdb_control_killtcp);
2709 data.dptr = (unsigned char *)killtcp;
2711 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_KILL_TCP, 0, data, NULL,
2712 NULL, &res, &timeout, NULL);
2713 if (ret != 0 || res != 0) {
2714 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for killtcp failed\n"));
2724 int ctdb_ctrl_gratious_arp(struct ctdb_context *ctdb,
2725 struct timeval timeout,
2727 ctdb_sock_addr *addr,
2733 struct ctdb_control_gratious_arp *gratious_arp;
2734 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2737 len = strlen(ifname)+1;
2738 gratious_arp = talloc_size(tmp_ctx,
2739 offsetof(struct ctdb_control_gratious_arp, iface) + len);
2740 CTDB_NO_MEMORY(ctdb, gratious_arp);
2742 gratious_arp->addr = *addr;
2743 gratious_arp->len = len;
2744 memcpy(&gratious_arp->iface[0], ifname, len);
2747 data.dsize = offsetof(struct ctdb_control_gratious_arp, iface) + len;
2748 data.dptr = (unsigned char *)gratious_arp;
2750 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SEND_GRATIOUS_ARP, 0, data, NULL,
2751 NULL, &res, &timeout, NULL);
2752 if (ret != 0 || res != 0) {
2753 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for gratious_arp failed\n"));
2754 talloc_free(tmp_ctx);
2758 talloc_free(tmp_ctx);
2763 get a list of all tcp tickles that a node knows about for a particular vnn
2765 int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
2766 struct timeval timeout, uint32_t destnode,
2767 TALLOC_CTX *mem_ctx,
2768 ctdb_sock_addr *addr,
2769 struct ctdb_control_tcp_tickle_list **list)
2772 TDB_DATA data, outdata;
2775 data.dptr = (uint8_t*)addr;
2776 data.dsize = sizeof(ctdb_sock_addr);
2778 ret = ctdb_control(ctdb, destnode, 0,
2779 CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data,
2780 mem_ctx, &outdata, &status, NULL, NULL);
2781 if (ret != 0 || status != 0) {
2782 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get tcp tickles failed\n"));
2786 *list = (struct ctdb_control_tcp_tickle_list *)outdata.dptr;
2792 register a server id
2794 int ctdb_ctrl_register_server_id(struct ctdb_context *ctdb,
2795 struct timeval timeout,
2796 struct ctdb_server_id *id)
2802 data.dsize = sizeof(struct ctdb_server_id);
2803 data.dptr = (unsigned char *)id;
2805 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
2806 CTDB_CONTROL_REGISTER_SERVER_ID,
2808 NULL, &res, &timeout, NULL);
2809 if (ret != 0 || res != 0) {
2810 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for register server id failed\n"));
2818 unregister a server id
2820 int ctdb_ctrl_unregister_server_id(struct ctdb_context *ctdb,
2821 struct timeval timeout,
2822 struct ctdb_server_id *id)
2828 data.dsize = sizeof(struct ctdb_server_id);
2829 data.dptr = (unsigned char *)id;
2831 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
2832 CTDB_CONTROL_UNREGISTER_SERVER_ID,
2834 NULL, &res, &timeout, NULL);
2835 if (ret != 0 || res != 0) {
2836 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for unregister server id failed\n"));
2845 check if a server id exists
2847 if a server id does exist, return *status == 1, otherwise *status == 0
2849 int ctdb_ctrl_check_server_id(struct ctdb_context *ctdb,
2850 struct timeval timeout,
2852 struct ctdb_server_id *id,
2859 data.dsize = sizeof(struct ctdb_server_id);
2860 data.dptr = (unsigned char *)id;
2862 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CHECK_SERVER_ID,
2864 NULL, &res, &timeout, NULL);
2866 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for check server id failed\n"));
2880 get the list of server ids that are registered on a node
2882 int ctdb_ctrl_get_server_id_list(struct ctdb_context *ctdb,
2883 TALLOC_CTX *mem_ctx,
2884 struct timeval timeout, uint32_t destnode,
2885 struct ctdb_server_id_list **svid_list)
2891 ret = ctdb_control(ctdb, destnode, 0,
2892 CTDB_CONTROL_GET_SERVER_ID_LIST, 0, tdb_null,
2893 mem_ctx, &outdata, &res, &timeout, NULL);
2894 if (ret != 0 || res != 0) {
2895 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_server_id_list failed\n"));
2899 *svid_list = (struct ctdb_server_id_list *)talloc_steal(mem_ctx, outdata.dptr);
2905 initialise the ctdb daemon for client applications
2907 NOTE: In current code the daemon does not fork. This is for testing purposes only
2908 and to simplify the code.
2910 struct ctdb_context *ctdb_init(struct event_context *ev)
2913 struct ctdb_context *ctdb;
2915 ctdb = talloc_zero(ev, struct ctdb_context);
2917 DEBUG(DEBUG_ERR,(__location__ " talloc_zero failed.\n"));
2921 ctdb->idr = idr_init(ctdb);
2922 /* Wrap early to exercise code. */
2923 ctdb->lastid = INT_MAX-200;
2924 CTDB_NO_MEMORY_NULL(ctdb, ctdb->idr);
2926 ret = ctdb_set_socketname(ctdb, CTDB_PATH);
2928 DEBUG(DEBUG_ERR,(__location__ " ctdb_set_socketname failed.\n"));
2933 ctdb->statistics.statistics_start_time = timeval_current();
2942 void ctdb_set_flags(struct ctdb_context *ctdb, unsigned flags)
2944 ctdb->flags |= flags;
2948 setup the local socket name
2950 int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname)
2952 ctdb->daemon.name = talloc_strdup(ctdb, socketname);
2953 CTDB_NO_MEMORY(ctdb, ctdb->daemon.name);
2958 const char *ctdb_get_socketname(struct ctdb_context *ctdb)
2960 return ctdb->daemon.name;
2964 return the pnn of this node
2966 uint32_t ctdb_get_pnn(struct ctdb_context *ctdb)
2973 get the uptime of a remote node
2975 struct ctdb_client_control_state *
2976 ctdb_ctrl_uptime_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
2978 return ctdb_control_send(ctdb, destnode, 0,
2979 CTDB_CONTROL_UPTIME, 0, tdb_null,
2980 mem_ctx, &timeout, NULL);
2983 int ctdb_ctrl_uptime_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, struct ctdb_uptime **uptime)
2989 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
2990 if (ret != 0 || res != 0) {
2991 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_uptime_recv failed\n"));
2995 *uptime = (struct ctdb_uptime *)talloc_steal(mem_ctx, outdata.dptr);
3000 int ctdb_ctrl_uptime(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_uptime **uptime)
3002 struct ctdb_client_control_state *state;
3004 state = ctdb_ctrl_uptime_send(ctdb, mem_ctx, timeout, destnode);
3005 return ctdb_ctrl_uptime_recv(ctdb, mem_ctx, state, uptime);
3009 send a control to execute the "recovered" event script on a node
3011 int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3016 ret = ctdb_control(ctdb, destnode, 0,
3017 CTDB_CONTROL_END_RECOVERY, 0, tdb_null,
3018 NULL, NULL, &status, &timeout, NULL);
3019 if (ret != 0 || status != 0) {
3020 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for end_recovery failed\n"));
3028 callback for the async helpers used when sending the same control
3029 to multiple nodes in parallell.
3031 static void async_callback(struct ctdb_client_control_state *state)
3033 struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
3034 struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context);
3038 uint32_t destnode = state->c->hdr.destnode;
3040 /* one more node has responded with recmode data */
3043 /* if we failed to push the db, then return an error and let
3044 the main loop try again.
3046 if (state->state != CTDB_CONTROL_DONE) {
3047 if ( !data->dont_log_errors) {
3048 DEBUG(DEBUG_ERR,("Async operation failed with state %d, opcode:%u\n", state->state, data->opcode));
3051 if (data->fail_callback) {
3052 data->fail_callback(ctdb, destnode, res, outdata,
3053 data->callback_data);
3058 state->async.fn = NULL;
3060 ret = ctdb_control_recv(ctdb, state, data, &outdata, &res, NULL);
3061 if ((ret != 0) || (res != 0)) {
3062 if ( !data->dont_log_errors) {
3063 DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d opcode=%u\n", ret, (int)res, data->opcode));
3066 if (data->fail_callback) {
3067 data->fail_callback(ctdb, destnode, res, outdata,
3068 data->callback_data);
3071 if ((ret == 0) && (data->callback != NULL)) {
3072 data->callback(ctdb, destnode, res, outdata,
3073 data->callback_data);
3078 void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state)
3080 /* set up the callback functions */
3081 state->async.fn = async_callback;
3082 state->async.private_data = data;
3084 /* one more control to wait for to complete */
3089 /* wait for up to the maximum number of seconds allowed
3090 or until all nodes we expect a response from has replied
3092 int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data)
3094 while (data->count > 0) {
3095 event_loop_once(ctdb->ev);
3097 if (data->fail_count != 0) {
3098 if (!data->dont_log_errors) {
3099 DEBUG(DEBUG_ERR,("Async wait failed - fail_count=%u\n",
3109 perform a simple control on the listed nodes
3110 The control cannot return data
3112 int ctdb_client_async_control(struct ctdb_context *ctdb,
3113 enum ctdb_controls opcode,
3116 struct timeval timeout,
3117 bool dont_log_errors,
3119 client_async_callback client_callback,
3120 client_async_callback fail_callback,
3121 void *callback_data)
3123 struct client_async_data *async_data;
3124 struct ctdb_client_control_state *state;
3127 async_data = talloc_zero(ctdb, struct client_async_data);
3128 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
3129 async_data->dont_log_errors = dont_log_errors;
3130 async_data->callback = client_callback;
3131 async_data->fail_callback = fail_callback;
3132 async_data->callback_data = callback_data;
3133 async_data->opcode = opcode;
3135 num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
3137 /* loop over all nodes and send an async control to each of them */
3138 for (j=0; j<num_nodes; j++) {
3139 uint32_t pnn = nodes[j];
3141 state = ctdb_control_send(ctdb, pnn, srvid, opcode,
3142 0, data, async_data, &timeout, NULL);
3143 if (state == NULL) {
3144 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
3145 talloc_free(async_data);
3149 ctdb_client_async_add(async_data, state);
3152 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
3153 talloc_free(async_data);
3157 talloc_free(async_data);
3161 uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
3162 struct ctdb_vnn_map *vnn_map,
3163 TALLOC_CTX *mem_ctx,
3166 int i, j, num_nodes;
3169 for (i=num_nodes=0;i<vnn_map->size;i++) {
3170 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3176 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3177 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3179 for (i=j=0;i<vnn_map->size;i++) {
3180 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
3183 nodes[j++] = vnn_map->map[i];
3189 uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
3190 struct ctdb_node_map *node_map,
3191 TALLOC_CTX *mem_ctx,
3194 int i, j, num_nodes;
3197 for (i=num_nodes=0;i<node_map->num;i++) {
3198 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3201 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3207 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3208 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3210 for (i=j=0;i<node_map->num;i++) {
3211 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3214 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3217 nodes[j++] = node_map->nodes[i].pnn;
3223 uint32_t *list_of_active_nodes_except_pnn(struct ctdb_context *ctdb,
3224 struct ctdb_node_map *node_map,
3225 TALLOC_CTX *mem_ctx,
3228 int i, j, num_nodes;
3231 for (i=num_nodes=0;i<node_map->num;i++) {
3232 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3235 if (node_map->nodes[i].pnn == pnn) {
3241 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3242 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3244 for (i=j=0;i<node_map->num;i++) {
3245 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3248 if (node_map->nodes[i].pnn == pnn) {
3251 nodes[j++] = node_map->nodes[i].pnn;
3257 uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
3258 struct ctdb_node_map *node_map,
3259 TALLOC_CTX *mem_ctx,
3262 int i, j, num_nodes;
3265 for (i=num_nodes=0;i<node_map->num;i++) {
3266 if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
3269 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3275 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3276 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3278 for (i=j=0;i<node_map->num;i++) {
3279 if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
3282 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3285 nodes[j++] = node_map->nodes[i].pnn;
3292 this is used to test if a pnn lock exists and if it exists will return
3293 the number of connections that pnn has reported or -1 if that recovery
3294 daemon is not running.
3297 ctdb_read_pnn_lock(int fd, int32_t pnn)
3302 lock.l_type = F_WRLCK;
3303 lock.l_whence = SEEK_SET;
3308 if (fcntl(fd, F_GETLK, &lock) != 0) {
3309 DEBUG(DEBUG_ERR, (__location__ " F_GETLK failed with %s\n", strerror(errno)));
3313 if (lock.l_type == F_UNLCK) {
3317 if (pread(fd, &c, 1, pnn) == -1) {
3318 DEBUG(DEBUG_CRIT,(__location__ " failed read pnn count - %s\n", strerror(errno)));
3326 get capabilities of a remote node
3328 struct ctdb_client_control_state *
3329 ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
3331 return ctdb_control_send(ctdb, destnode, 0,
3332 CTDB_CONTROL_GET_CAPABILITIES, 0, tdb_null,
3333 mem_ctx, &timeout, NULL);
3336 int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities)
3342 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
3343 if ( (ret != 0) || (res != 0) ) {
3344 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getcapabilities_recv failed\n"));
3349 *capabilities = *((uint32_t *)outdata.dptr);
3355 int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities)
3357 struct ctdb_client_control_state *state;
3358 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
3361 state = ctdb_ctrl_getcapabilities_send(ctdb, tmp_ctx, timeout, destnode);
3362 ret = ctdb_ctrl_getcapabilities_recv(ctdb, tmp_ctx, state, capabilities);
3363 talloc_free(tmp_ctx);
3368 * check whether a transaction is active on a given db on a given node
3370 int32_t ctdb_ctrl_transaction_active(struct ctdb_context *ctdb,
3378 indata.dptr = (uint8_t *)&db_id;
3379 indata.dsize = sizeof(db_id);
3381 ret = ctdb_control(ctdb, destnode, 0,
3382 CTDB_CONTROL_TRANS2_ACTIVE,
3383 0, indata, NULL, NULL, &status,
3387 DEBUG(DEBUG_ERR, (__location__ " ctdb control for transaction_active failed\n"));
3395 struct ctdb_transaction_handle {
3396 struct ctdb_db_context *ctdb_db;
3399 * we store the reads and writes done under a transaction:
3400 * - one list stores both reads and writes (m_all),
3401 * - the other just writes (m_write)
3403 struct ctdb_marshall_buffer *m_all;
3404 struct ctdb_marshall_buffer *m_write;
3407 /* start a transaction on a database */
3408 static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h)
3410 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3414 /* start a transaction on a database */
3415 static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
3417 struct ctdb_record_handle *rh;
3420 struct ctdb_ltdb_header header;
3421 TALLOC_CTX *tmp_ctx;
3422 const char *keyname = CTDB_TRANSACTION_LOCK_KEY;
3424 struct ctdb_db_context *ctdb_db = h->ctdb_db;
3428 key.dptr = discard_const(keyname);
3429 key.dsize = strlen(keyname);
3431 if (!ctdb_db->persistent) {
3432 DEBUG(DEBUG_ERR,(__location__ " Attempted transaction on non-persistent database\n"));
3437 tmp_ctx = talloc_new(h);
3439 rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL);
3441 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n"));
3442 talloc_free(tmp_ctx);
3446 status = ctdb_ctrl_transaction_active(ctdb_db->ctdb,
3450 unsigned long int usec = (1000 + random()) % 100000;
3451 DEBUG(DEBUG_DEBUG, (__location__ " transaction is active "
3452 "on db_id[0x%08x]. waiting for %lu "
3454 ctdb_db->db_id, usec));
3455 talloc_free(tmp_ctx);
3461 * store the pid in the database:
3462 * it is not enough that the node is dmaster...
3465 data.dptr = (unsigned char *)&pid;
3466 data.dsize = sizeof(pid_t);
3468 rh->header.dmaster = ctdb_db->ctdb->pnn;
3469 ret = ctdb_ltdb_store(ctdb_db, key, &(rh->header), data);
3471 DEBUG(DEBUG_ERR, (__location__ " Failed to store pid in "
3472 "transaction record\n"));
3473 talloc_free(tmp_ctx);
3479 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
3481 DEBUG(DEBUG_ERR,(__location__ " Failed to start tdb transaction\n"));
3482 talloc_free(tmp_ctx);
3486 ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, &data);
3488 DEBUG(DEBUG_ERR,(__location__ " Failed to re-fetch transaction "
3489 "lock record inside transaction\n"));
3490 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3491 talloc_free(tmp_ctx);
3495 if (header.dmaster != ctdb_db->ctdb->pnn) {
3496 DEBUG(DEBUG_DEBUG,(__location__ " not dmaster any more on "
3497 "transaction lock record\n"));
3498 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3499 talloc_free(tmp_ctx);
3503 if ((data.dsize != sizeof(pid_t)) || (*(pid_t *)(data.dptr) != pid)) {
3504 DEBUG(DEBUG_DEBUG, (__location__ " my pid is not stored in "
3505 "the transaction lock record\n"));
3506 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3507 talloc_free(tmp_ctx);
3511 talloc_free(tmp_ctx);
3517 /* start a transaction on a database */
3518 struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db,
3519 TALLOC_CTX *mem_ctx)
3521 struct ctdb_transaction_handle *h;
3524 h = talloc_zero(mem_ctx, struct ctdb_transaction_handle);
3526 DEBUG(DEBUG_ERR,(__location__ " oom for transaction handle\n"));
3530 h->ctdb_db = ctdb_db;
3532 ret = ctdb_transaction_fetch_start(h);
3538 talloc_set_destructor(h, ctdb_transaction_destructor);
3546 fetch a record inside a transaction
3548 int ctdb_transaction_fetch(struct ctdb_transaction_handle *h,
3549 TALLOC_CTX *mem_ctx,
3550 TDB_DATA key, TDB_DATA *data)
3552 struct ctdb_ltdb_header header;
3555 ZERO_STRUCT(header);
3557 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, mem_ctx, data);
3558 if (ret == -1 && header.dmaster == (uint32_t)-1) {
3559 /* record doesn't exist yet */
3568 if (!h->in_replay) {
3569 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 1, key, NULL, *data);
3570 if (h->m_all == NULL) {
3571 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3580 stores a record inside a transaction
3582 int ctdb_transaction_store(struct ctdb_transaction_handle *h,
3583 TDB_DATA key, TDB_DATA data)
3585 TALLOC_CTX *tmp_ctx = talloc_new(h);
3586 struct ctdb_ltdb_header header;
3590 ZERO_STRUCT(header);
3592 /* we need the header so we can update the RSN */
3593 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata);
3594 if (ret == -1 && header.dmaster == (uint32_t)-1) {
3595 /* the record doesn't exist - create one with us as dmaster.
3596 This is only safe because we are in a transaction and this
3597 is a persistent database */
3598 ZERO_STRUCT(header);
3599 } else if (ret != 0) {
3600 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch record\n"));
3601 talloc_free(tmp_ctx);
3605 if (data.dsize == olddata.dsize &&
3606 memcmp(data.dptr, olddata.dptr, data.dsize) == 0) {
3607 /* save writing the same data */
3608 talloc_free(tmp_ctx);
3612 header.dmaster = h->ctdb_db->ctdb->pnn;
3615 if (!h->in_replay) {
3616 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 0, key, NULL, data);
3617 if (h->m_all == NULL) {
3618 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3619 talloc_free(tmp_ctx);
3624 h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
3625 if (h->m_write == NULL) {
3626 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3627 talloc_free(tmp_ctx);
3631 ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data);
3633 talloc_free(tmp_ctx);
3639 replay a transaction
3641 static int ctdb_replay_transaction(struct ctdb_transaction_handle *h)
3644 struct ctdb_rec_data *rec = NULL;
3646 h->in_replay = true;
3647 talloc_free(h->m_write);
3650 ret = ctdb_transaction_fetch_start(h);
3655 for (i=0;i<h->m_all->count;i++) {
3658 rec = ctdb_marshall_loop_next(h->m_all, rec, NULL, NULL, &key, &data);
3660 DEBUG(DEBUG_ERR, (__location__ " Out of records in ctdb_replay_transaction?\n"));
3664 if (rec->reqid == 0) {
3666 if (ctdb_transaction_store(h, key, data) != 0) {
3671 TALLOC_CTX *tmp_ctx = talloc_new(h);
3673 if (ctdb_transaction_fetch(h, tmp_ctx, key, &data2) != 0) {
3674 talloc_free(tmp_ctx);
3677 if (data2.dsize != data.dsize ||
3678 memcmp(data2.dptr, data.dptr, data.dsize) != 0) {
3679 /* the record has changed on us - we have to give up */
3680 talloc_free(tmp_ctx);
3683 talloc_free(tmp_ctx);
3690 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3696 commit a transaction
3698 int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
3702 struct ctdb_context *ctdb = h->ctdb_db->ctdb;
3703 struct timeval timeout;
3704 enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR;
3706 talloc_set_destructor(h, NULL);
3708 /* our commit strategy is quite complex.
3710 - we first try to commit the changes to all other nodes
3712 - if that works, then we commit locally and we are done
3714 - if a commit on another node fails, then we need to cancel
3715 the transaction, then restart the transaction (thus
3716 opening a window of time for a pending recovery to
3717 complete), then replay the transaction, checking all the
3718 reads and writes (checking that reads give the same data,
3719 and writes succeed). Then we retry the transaction to the
3724 if (h->m_write == NULL) {
3725 /* no changes were made */
3726 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3731 /* tell ctdbd to commit to the other nodes */
3732 timeout = timeval_current_ofs(1, 0);
3733 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3734 retries==0?CTDB_CONTROL_TRANS2_COMMIT:CTDB_CONTROL_TRANS2_COMMIT_RETRY, 0,
3735 ctdb_marshall_finish(h->m_write), NULL, NULL, &status,
3737 if (ret != 0 || status != 0) {
3738 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3739 DEBUG(DEBUG_NOTICE, (__location__ " transaction commit%s failed"
3740 ", retrying after 1 second...\n",
3741 (retries==0)?"":"retry "));
3745 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3747 /* work out what error code we will give if we
3748 have to fail the operation */
3749 switch ((enum ctdb_trans2_commit_error)status) {
3750 case CTDB_TRANS2_COMMIT_SUCCESS:
3751 case CTDB_TRANS2_COMMIT_SOMEFAIL:
3752 case CTDB_TRANS2_COMMIT_TIMEOUT:
3753 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3755 case CTDB_TRANS2_COMMIT_ALLFAIL:
3756 failure_control = CTDB_CONTROL_TRANS2_FINISHED;
3761 if (++retries == 100) {
3762 DEBUG(DEBUG_ERR,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n",
3763 h->ctdb_db->db_id, retries, (unsigned)failure_control));
3764 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3765 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3766 tdb_null, NULL, NULL, NULL, NULL, NULL);
3771 if (ctdb_replay_transaction(h) != 0) {
3772 DEBUG(DEBUG_ERR, (__location__ " Failed to replay "
3773 "transaction on db 0x%08x, "
3774 "failure control =%u\n",
3776 (unsigned)failure_control));
3777 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3778 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3779 tdb_null, NULL, NULL, NULL, NULL, NULL);
3785 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3788 /* do the real commit locally */
3789 ret = tdb_transaction_commit(h->ctdb_db->ltdb->tdb);
3791 DEBUG(DEBUG_ERR, (__location__ " Failed to commit transaction "
3792 "on db id 0x%08x locally, "
3793 "failure_control=%u\n",
3795 (unsigned)failure_control));
3796 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3797 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3798 tdb_null, NULL, NULL, NULL, NULL, NULL);
3803 /* tell ctdbd that we are finished with our local commit */
3804 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3805 CTDB_CONTROL_TRANS2_FINISHED, CTDB_CTRL_FLAG_NOREPLY,
3806 tdb_null, NULL, NULL, NULL, NULL, NULL);
3812 recovery daemon ping to main daemon
3814 int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
3819 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null,
3820 ctdb, NULL, &res, NULL, NULL);
3821 if (ret != 0 || res != 0) {
3822 DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
3829 /* when forking the main daemon and the child process needs to connect back
3830 * to the daemon as a client process, this function can be used to change
3831 * the ctdb context from daemon into client mode
3833 int switch_from_server_to_client(struct ctdb_context *ctdb, const char *fmt, ...)
3838 /* Add extra information so we can identify this in the logs */
3840 debug_extra = talloc_append_string(NULL, talloc_vasprintf(NULL, fmt, ap), ":");
3843 /* shutdown the transport */
3844 if (ctdb->methods) {
3845 ctdb->methods->shutdown(ctdb);
3848 /* get a new event context */
3849 talloc_free(ctdb->ev);
3850 ctdb->ev = event_context_init(ctdb);
3851 tevent_loop_allow_nesting(ctdb->ev);
3853 close(ctdb->daemon.sd);
3854 ctdb->daemon.sd = -1;
3856 /* initialise ctdb */
3857 ret = ctdb_socket_connect(ctdb);
3859 DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
3867 get the status of running the monitor eventscripts: NULL means never run.
3869 int ctdb_ctrl_getscriptstatus(struct ctdb_context *ctdb,
3870 struct timeval timeout, uint32_t destnode,
3871 TALLOC_CTX *mem_ctx, enum ctdb_eventscript_call type,
3872 struct ctdb_scripts_wire **script_status)
3875 TDB_DATA outdata, indata;
3877 uint32_t uinttype = type;
3879 indata.dptr = (uint8_t *)&uinttype;
3880 indata.dsize = sizeof(uinttype);
3882 ret = ctdb_control(ctdb, destnode, 0,
3883 CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, 0, indata,
3884 mem_ctx, &outdata, &res, &timeout, NULL);
3885 if (ret != 0 || res != 0) {
3886 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getscriptstatus failed ret:%d res:%d\n", ret, res));
3890 if (outdata.dsize == 0) {
3891 *script_status = NULL;
3893 *script_status = (struct ctdb_scripts_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
3894 talloc_free(outdata.dptr);
3901 tell the main daemon how long it took to lock the reclock file
3903 int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval timeout, double latency)
3909 data.dptr = (uint8_t *)&latency;
3910 data.dsize = sizeof(latency);
3912 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_RECLOCK_LATENCY, 0, data,
3913 ctdb, NULL, &res, NULL, NULL);
3914 if (ret != 0 || res != 0) {
3915 DEBUG(DEBUG_ERR,("Failed to send recd reclock latency\n"));
3923 get the name of the reclock file
3925 int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout,
3926 uint32_t destnode, TALLOC_CTX *mem_ctx,
3933 ret = ctdb_control(ctdb, destnode, 0,
3934 CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null,
3935 mem_ctx, &data, &res, &timeout, NULL);
3936 if (ret != 0 || res != 0) {
3940 if (data.dsize == 0) {
3943 *name = talloc_strdup(mem_ctx, discard_const(data.dptr));
3945 talloc_free(data.dptr);
3951 set the reclock filename for a node
3953 int ctdb_ctrl_setreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *reclock)
3959 if (reclock == NULL) {
3963 data.dsize = strlen(reclock) + 1;
3964 data.dptr = discard_const(reclock);
3967 ret = ctdb_control(ctdb, destnode, 0,
3968 CTDB_CONTROL_SET_RECLOCK_FILE, 0, data,
3969 NULL, NULL, &res, &timeout, NULL);
3970 if (ret != 0 || res != 0) {
3971 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setreclock failed\n"));
3981 int ctdb_ctrl_stop_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3986 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_STOP_NODE, 0, tdb_null,
3987 ctdb, NULL, &res, &timeout, NULL);
3988 if (ret != 0 || res != 0) {
3989 DEBUG(DEBUG_ERR,("Failed to stop node\n"));
3999 int ctdb_ctrl_continue_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
4003 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CONTINUE_NODE, 0, tdb_null,
4004 ctdb, NULL, NULL, &timeout, NULL);
4006 DEBUG(DEBUG_ERR,("Failed to continue node\n"));
4014 set the natgw state for a node
4016 int ctdb_ctrl_setnatgwstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t natgwstate)
4022 data.dsize = sizeof(natgwstate);
4023 data.dptr = (uint8_t *)&natgwstate;
4025 ret = ctdb_control(ctdb, destnode, 0,
4026 CTDB_CONTROL_SET_NATGWSTATE, 0, data,
4027 NULL, NULL, &res, &timeout, NULL);
4028 if (ret != 0 || res != 0) {
4029 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setnatgwstate failed\n"));
4037 set the lmaster role for a node
4039 int ctdb_ctrl_setlmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t lmasterrole)
4045 data.dsize = sizeof(lmasterrole);
4046 data.dptr = (uint8_t *)&lmasterrole;
4048 ret = ctdb_control(ctdb, destnode, 0,
4049 CTDB_CONTROL_SET_LMASTERROLE, 0, data,
4050 NULL, NULL, &res, &timeout, NULL);
4051 if (ret != 0 || res != 0) {
4052 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setlmasterrole failed\n"));
4060 set the recmaster role for a node
4062 int ctdb_ctrl_setrecmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmasterrole)
4068 data.dsize = sizeof(recmasterrole);
4069 data.dptr = (uint8_t *)&recmasterrole;
4071 ret = ctdb_control(ctdb, destnode, 0,
4072 CTDB_CONTROL_SET_RECMASTERROLE, 0, data,
4073 NULL, NULL, &res, &timeout, NULL);
4074 if (ret != 0 || res != 0) {
4075 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmasterrole failed\n"));
4082 /* enable an eventscript
4084 int ctdb_ctrl_enablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4090 data.dsize = strlen(script) + 1;
4091 data.dptr = discard_const(script);
4093 ret = ctdb_control(ctdb, destnode, 0,
4094 CTDB_CONTROL_ENABLE_SCRIPT, 0, data,
4095 NULL, NULL, &res, &timeout, NULL);
4096 if (ret != 0 || res != 0) {
4097 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enablescript failed\n"));
4104 /* disable an eventscript
4106 int ctdb_ctrl_disablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
4112 data.dsize = strlen(script) + 1;
4113 data.dptr = discard_const(script);
4115 ret = ctdb_control(ctdb, destnode, 0,
4116 CTDB_CONTROL_DISABLE_SCRIPT, 0, data,
4117 NULL, NULL, &res, &timeout, NULL);
4118 if (ret != 0 || res != 0) {
4119 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disablescript failed\n"));
4127 int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_ban_time *bantime)
4133 data.dsize = sizeof(*bantime);
4134 data.dptr = (uint8_t *)bantime;
4136 ret = ctdb_control(ctdb, destnode, 0,
4137 CTDB_CONTROL_SET_BAN_STATE, 0, data,
4138 NULL, NULL, &res, &timeout, NULL);
4139 if (ret != 0 || res != 0) {
4140 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4148 int ctdb_ctrl_get_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_ban_time **bantime)
4153 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4155 ret = ctdb_control(ctdb, destnode, 0,
4156 CTDB_CONTROL_GET_BAN_STATE, 0, tdb_null,
4157 tmp_ctx, &outdata, &res, &timeout, NULL);
4158 if (ret != 0 || res != 0) {
4159 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4160 talloc_free(tmp_ctx);
4164 *bantime = (struct ctdb_ban_time *)talloc_steal(mem_ctx, outdata.dptr);
4165 talloc_free(tmp_ctx);
4171 int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio)
4176 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4178 data.dptr = (uint8_t*)db_prio;
4179 data.dsize = sizeof(*db_prio);
4181 ret = ctdb_control(ctdb, destnode, 0,
4182 CTDB_CONTROL_SET_DB_PRIORITY, 0, data,
4183 tmp_ctx, NULL, &res, &timeout, NULL);
4184 if (ret != 0 || res != 0) {
4185 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
4186 talloc_free(tmp_ctx);
4190 talloc_free(tmp_ctx);
4195 int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority)
4200 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4202 data.dptr = (uint8_t*)&db_id;
4203 data.dsize = sizeof(db_id);
4205 ret = ctdb_control(ctdb, destnode, 0,
4206 CTDB_CONTROL_GET_DB_PRIORITY, 0, data,
4207 tmp_ctx, NULL, &res, &timeout, NULL);
4208 if (ret != 0 || res < 0) {
4209 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
4210 talloc_free(tmp_ctx);
4218 talloc_free(tmp_ctx);