4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
23 #include "lib/tdb/include/tdb.h"
24 #include "lib/util/dlinklist.h"
25 #include "lib/events/events.h"
26 #include "system/network.h"
27 #include "system/filesys.h"
28 #include "system/locale.h"
30 #include "../include/ctdb_private.h"
31 #include "lib/util/dlinklist.h"
36 allocate a packet for use in client<->daemon communication
38 struct ctdb_req_header *_ctdbd_allocate_pkt(struct ctdb_context *ctdb,
40 enum ctdb_operation operation,
41 size_t length, size_t slength,
45 struct ctdb_req_header *hdr;
47 length = MAX(length, slength);
48 size = (length+(CTDB_DS_ALIGNMENT-1)) & ~(CTDB_DS_ALIGNMENT-1);
50 hdr = (struct ctdb_req_header *)talloc_size(mem_ctx, size);
52 DEBUG(DEBUG_ERR,("Unable to allocate packet for operation %u of length %u\n",
53 operation, (unsigned)length));
56 talloc_set_name_const(hdr, type);
57 memset(hdr, 0, slength);
59 hdr->operation = operation;
60 hdr->ctdb_magic = CTDB_MAGIC;
61 hdr->ctdb_version = CTDB_VERSION;
62 hdr->srcnode = ctdb->pnn;
64 hdr->generation = ctdb->vnn_map->generation;
71 local version of ctdb_call
73 int ctdb_call_local(struct ctdb_db_context *ctdb_db, struct ctdb_call *call,
74 struct ctdb_ltdb_header *header, TALLOC_CTX *mem_ctx,
75 TDB_DATA *data, uint32_t caller)
77 struct ctdb_call_info *c;
78 struct ctdb_registered_call *fn;
79 struct ctdb_context *ctdb = ctdb_db->ctdb;
81 c = talloc(ctdb, struct ctdb_call_info);
82 CTDB_NO_MEMORY(ctdb, c);
85 c->call_data = &call->call_data;
86 c->record_data.dptr = talloc_memdup(c, data->dptr, data->dsize);
87 c->record_data.dsize = data->dsize;
88 CTDB_NO_MEMORY(ctdb, c->record_data.dptr);
93 for (fn=ctdb_db->calls;fn;fn=fn->next) {
94 if (fn->id == call->call_id) break;
97 ctdb_set_error(ctdb, "Unknown call id %u\n", call->call_id);
102 if (fn->fn(c) != 0) {
103 ctdb_set_error(ctdb, "ctdb_call %u failed\n", call->call_id);
108 if (header->laccessor != caller) {
111 header->laccessor = caller;
114 /* we need to force the record to be written out if this was a remote access,
115 so that the lacount is updated */
116 if (c->new_data == NULL && header->laccessor != ctdb->pnn) {
117 c->new_data = &c->record_data;
121 /* XXX check that we always have the lock here? */
122 if (ctdb_ltdb_store(ctdb_db, call->key, header, *c->new_data) != 0) {
123 ctdb_set_error(ctdb, "ctdb_call tdb_store failed\n");
130 call->reply_data = *c->reply_data;
132 talloc_steal(call, call->reply_data.dptr);
133 talloc_set_name_const(call->reply_data.dptr, __location__);
135 call->reply_data.dptr = NULL;
136 call->reply_data.dsize = 0;
138 call->status = c->status;
147 queue a packet for sending from client to daemon
149 static int ctdb_client_queue_pkt(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
151 return ctdb_queue_send(ctdb->daemon.queue, (uint8_t *)hdr, hdr->length);
156 called when a CTDB_REPLY_CALL packet comes in in the client
158 This packet comes in response to a CTDB_REQ_CALL request packet. It
159 contains any reply data from the call
161 static void ctdb_client_reply_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr)
163 struct ctdb_reply_call *c = (struct ctdb_reply_call *)hdr;
164 struct ctdb_client_call_state *state;
166 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_call_state);
168 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
172 if (hdr->reqid != state->reqid) {
173 /* we found a record but it was the wrong one */
174 DEBUG(DEBUG_ERR, ("Dropped client call reply with reqid:%u\n",hdr->reqid));
178 state->call->reply_data.dptr = c->data;
179 state->call->reply_data.dsize = c->datalen;
180 state->call->status = c->status;
182 talloc_steal(state, c);
184 state->state = CTDB_CALL_DONE;
186 if (state->async.fn) {
187 state->async.fn(state);
191 static void ctdb_client_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header *hdr);
194 this is called in the client, when data comes in from the daemon
196 static void ctdb_client_read_cb(uint8_t *data, size_t cnt, void *args)
198 struct ctdb_context *ctdb = talloc_get_type(args, struct ctdb_context);
199 struct ctdb_req_header *hdr = (struct ctdb_req_header *)data;
202 /* place the packet as a child of a tmp_ctx. We then use
203 talloc_free() below to free it. If any of the calls want
204 to keep it, then they will steal it somewhere else, and the
205 talloc_free() will be a no-op */
206 tmp_ctx = talloc_new(ctdb);
207 talloc_steal(tmp_ctx, hdr);
210 DEBUG(DEBUG_INFO,("Daemon has exited - shutting down client\n"));
214 if (cnt < sizeof(*hdr)) {
215 DEBUG(DEBUG_CRIT,("Bad packet length %u in client\n", (unsigned)cnt));
218 if (cnt != hdr->length) {
219 ctdb_set_error(ctdb, "Bad header length %u expected %u in client\n",
220 (unsigned)hdr->length, (unsigned)cnt);
224 if (hdr->ctdb_magic != CTDB_MAGIC) {
225 ctdb_set_error(ctdb, "Non CTDB packet rejected in client\n");
229 if (hdr->ctdb_version != CTDB_VERSION) {
230 ctdb_set_error(ctdb, "Bad CTDB version 0x%x rejected in client\n", hdr->ctdb_version);
234 switch (hdr->operation) {
235 case CTDB_REPLY_CALL:
236 ctdb_client_reply_call(ctdb, hdr);
239 case CTDB_REQ_MESSAGE:
240 ctdb_request_message(ctdb, hdr);
243 case CTDB_REPLY_CONTROL:
244 ctdb_client_reply_control(ctdb, hdr);
248 DEBUG(DEBUG_CRIT,("bogus operation code:%u\n",hdr->operation));
252 talloc_free(tmp_ctx);
256 connect to a unix domain socket
258 int ctdb_socket_connect(struct ctdb_context *ctdb)
260 struct sockaddr_un addr;
262 memset(&addr, 0, sizeof(addr));
263 addr.sun_family = AF_UNIX;
264 strncpy(addr.sun_path, ctdb->daemon.name, sizeof(addr.sun_path));
266 ctdb->daemon.sd = socket(AF_UNIX, SOCK_STREAM, 0);
267 if (ctdb->daemon.sd == -1) {
268 DEBUG(DEBUG_ERR,(__location__ " Failed to open client socket. Errno:%s(%d)\n", strerror(errno), errno));
272 set_nonblocking(ctdb->daemon.sd);
273 set_close_on_exec(ctdb->daemon.sd);
275 if (connect(ctdb->daemon.sd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
276 close(ctdb->daemon.sd);
277 ctdb->daemon.sd = -1;
278 DEBUG(DEBUG_ERR,(__location__ " Failed to connect client socket to daemon. Errno:%s(%d)\n", strerror(errno), errno));
282 ctdb->daemon.queue = ctdb_queue_setup(ctdb, ctdb, ctdb->daemon.sd,
284 ctdb_client_read_cb, ctdb);
289 struct ctdb_record_handle {
290 struct ctdb_db_context *ctdb_db;
293 struct ctdb_ltdb_header header;
298 make a recv call to the local ctdb daemon - called from client context
300 This is called when the program wants to wait for a ctdb_call to complete and get the
301 results. This call will block unless the call has already completed.
303 int ctdb_call_recv(struct ctdb_client_call_state *state, struct ctdb_call *call)
309 while (state->state < CTDB_CALL_DONE) {
310 event_loop_once(state->ctdb_db->ctdb->ev);
312 if (state->state != CTDB_CALL_DONE) {
313 DEBUG(DEBUG_ERR,(__location__ " ctdb_call_recv failed\n"));
318 if (state->call->reply_data.dsize) {
319 call->reply_data.dptr = talloc_memdup(state->ctdb_db,
320 state->call->reply_data.dptr,
321 state->call->reply_data.dsize);
322 call->reply_data.dsize = state->call->reply_data.dsize;
324 call->reply_data.dptr = NULL;
325 call->reply_data.dsize = 0;
327 call->status = state->call->status;
337 destroy a ctdb_call in client
339 static int ctdb_client_call_destructor(struct ctdb_client_call_state *state)
341 ctdb_reqid_remove(state->ctdb_db->ctdb, state->reqid);
346 construct an event driven local ctdb_call
348 this is used so that locally processed ctdb_call requests are processed
349 in an event driven manner
351 static struct ctdb_client_call_state *ctdb_client_call_local_send(struct ctdb_db_context *ctdb_db,
352 struct ctdb_call *call,
353 struct ctdb_ltdb_header *header,
356 struct ctdb_client_call_state *state;
357 struct ctdb_context *ctdb = ctdb_db->ctdb;
360 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
361 CTDB_NO_MEMORY_NULL(ctdb, state);
362 state->call = talloc_zero(state, struct ctdb_call);
363 CTDB_NO_MEMORY_NULL(ctdb, state->call);
365 talloc_steal(state, data->dptr);
367 state->state = CTDB_CALL_DONE;
368 *(state->call) = *call;
369 state->ctdb_db = ctdb_db;
371 ret = ctdb_call_local(ctdb_db, state->call, header, state, data, ctdb->pnn);
377 make a ctdb call to the local daemon - async send. Called from client context.
379 This constructs a ctdb_call request and queues it for processing.
380 This call never blocks.
382 struct ctdb_client_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db,
383 struct ctdb_call *call)
385 struct ctdb_client_call_state *state;
386 struct ctdb_context *ctdb = ctdb_db->ctdb;
387 struct ctdb_ltdb_header header;
391 struct ctdb_req_call *c;
393 /* if the domain socket is not yet open, open it */
394 if (ctdb->daemon.sd==-1) {
395 ctdb_socket_connect(ctdb);
398 ret = ctdb_ltdb_lock(ctdb_db, call->key);
400 DEBUG(DEBUG_ERR,(__location__ " Failed to get chainlock\n"));
404 ret = ctdb_ltdb_fetch(ctdb_db, call->key, &header, ctdb_db, &data);
406 if (ret == 0 && header.dmaster == ctdb->pnn) {
407 state = ctdb_client_call_local_send(ctdb_db, call, &header, &data);
408 talloc_free(data.dptr);
409 ctdb_ltdb_unlock(ctdb_db, call->key);
413 ctdb_ltdb_unlock(ctdb_db, call->key);
414 talloc_free(data.dptr);
416 state = talloc_zero(ctdb_db, struct ctdb_client_call_state);
418 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state\n"));
421 state->call = talloc_zero(state, struct ctdb_call);
422 if (state->call == NULL) {
423 DEBUG(DEBUG_ERR, (__location__ " failed to allocate state->call\n"));
427 len = offsetof(struct ctdb_req_call, data) + call->key.dsize + call->call_data.dsize;
428 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CALL, len, struct ctdb_req_call);
430 DEBUG(DEBUG_ERR, (__location__ " failed to allocate packet\n"));
434 state->reqid = ctdb_reqid_new(ctdb, state);
435 state->ctdb_db = ctdb_db;
436 talloc_set_destructor(state, ctdb_client_call_destructor);
438 c->hdr.reqid = state->reqid;
439 c->flags = call->flags;
440 c->db_id = ctdb_db->db_id;
441 c->callid = call->call_id;
443 c->keylen = call->key.dsize;
444 c->calldatalen = call->call_data.dsize;
445 memcpy(&c->data[0], call->key.dptr, call->key.dsize);
446 memcpy(&c->data[call->key.dsize],
447 call->call_data.dptr, call->call_data.dsize);
448 *(state->call) = *call;
449 state->call->call_data.dptr = &c->data[call->key.dsize];
450 state->call->key.dptr = &c->data[0];
452 state->state = CTDB_CALL_WAIT;
455 ctdb_client_queue_pkt(ctdb, &c->hdr);
462 full ctdb_call. Equivalent to a ctdb_call_send() followed by a ctdb_call_recv()
464 int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call)
466 struct ctdb_client_call_state *state;
468 state = ctdb_call_send(ctdb_db, call);
469 return ctdb_call_recv(state, call);
474 tell the daemon what messaging srvid we will use, and register the message
475 handler function in the client
477 int ctdb_set_message_handler(struct ctdb_context *ctdb, uint64_t srvid,
478 ctdb_message_fn_t handler,
485 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_REGISTER_SRVID, 0,
486 tdb_null, NULL, NULL, &status, NULL, NULL);
487 if (res != 0 || status != 0) {
488 DEBUG(DEBUG_ERR,("Failed to register srvid %llu\n", (unsigned long long)srvid));
492 /* also need to register the handler with our own ctdb structure */
493 return ctdb_register_message_handler(ctdb, ctdb, srvid, handler, private_data);
497 tell the daemon we no longer want a srvid
499 int ctdb_remove_message_handler(struct ctdb_context *ctdb, uint64_t srvid, void *private_data)
504 res = ctdb_control(ctdb, CTDB_CURRENT_NODE, srvid, CTDB_CONTROL_DEREGISTER_SRVID, 0,
505 tdb_null, NULL, NULL, &status, NULL, NULL);
506 if (res != 0 || status != 0) {
507 DEBUG(DEBUG_ERR,("Failed to deregister srvid %llu\n", (unsigned long long)srvid));
511 /* also need to register the handler with our own ctdb structure */
512 ctdb_deregister_message_handler(ctdb, srvid, private_data);
518 send a message - from client context
520 int ctdb_send_message(struct ctdb_context *ctdb, uint32_t pnn,
521 uint64_t srvid, TDB_DATA data)
523 struct ctdb_req_message *r;
526 len = offsetof(struct ctdb_req_message, data) + data.dsize;
527 r = ctdbd_allocate_pkt(ctdb, ctdb, CTDB_REQ_MESSAGE,
528 len, struct ctdb_req_message);
529 CTDB_NO_MEMORY(ctdb, r);
531 r->hdr.destnode = pnn;
533 r->datalen = data.dsize;
534 memcpy(&r->data[0], data.dptr, data.dsize);
536 res = ctdb_client_queue_pkt(ctdb, &r->hdr);
547 cancel a ctdb_fetch_lock operation, releasing the lock
549 static int fetch_lock_destructor(struct ctdb_record_handle *h)
551 ctdb_ltdb_unlock(h->ctdb_db, h->key);
556 force the migration of a record to this node
558 static int ctdb_client_force_migration(struct ctdb_db_context *ctdb_db, TDB_DATA key)
560 struct ctdb_call call;
562 call.call_id = CTDB_NULL_FUNC;
564 call.flags = CTDB_IMMEDIATE_MIGRATION;
565 return ctdb_call(ctdb_db, &call);
569 get a lock on a record, and return the records data. Blocks until it gets the lock
571 struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
572 TDB_DATA key, TDB_DATA *data)
575 struct ctdb_record_handle *h;
578 procedure is as follows:
580 1) get the chain lock.
581 2) check if we are dmaster
582 3) if we are the dmaster then return handle
583 4) if not dmaster then ask ctdb daemon to make us dmaster, and wait for
585 5) when we get the reply, goto (1)
588 h = talloc_zero(mem_ctx, struct ctdb_record_handle);
593 h->ctdb_db = ctdb_db;
595 h->key.dptr = talloc_memdup(h, key.dptr, key.dsize);
596 if (h->key.dptr == NULL) {
602 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: key=%*.*s\n", (int)key.dsize, (int)key.dsize,
603 (const char *)key.dptr));
606 /* step 1 - get the chain lock */
607 ret = ctdb_ltdb_lock(ctdb_db, key);
609 DEBUG(DEBUG_ERR, (__location__ " failed to lock ltdb record\n"));
614 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: got chain lock\n"));
616 talloc_set_destructor(h, fetch_lock_destructor);
618 ret = ctdb_ltdb_fetch(ctdb_db, key, &h->header, h, data);
620 /* when torturing, ensure we test the remote path */
621 if ((ctdb_db->ctdb->flags & CTDB_FLAG_TORTURE) &&
623 h->header.dmaster = (uint32_t)-1;
627 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: done local fetch\n"));
629 if (ret != 0 || h->header.dmaster != ctdb_db->ctdb->pnn) {
630 ctdb_ltdb_unlock(ctdb_db, key);
631 ret = ctdb_client_force_migration(ctdb_db, key);
633 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: force_migration failed\n"));
640 DEBUG(DEBUG_DEBUG,("ctdb_fetch_lock: we are dmaster - done\n"));
645 store some data to the record that was locked with ctdb_fetch_lock()
647 int ctdb_record_store(struct ctdb_record_handle *h, TDB_DATA data)
649 if (h->ctdb_db->persistent) {
650 DEBUG(DEBUG_ERR, (__location__ " ctdb_record_store prohibited for persistent dbs\n"));
654 return ctdb_ltdb_store(h->ctdb_db, h->key, &h->header, data);
658 non-locking fetch of a record
660 int ctdb_fetch(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx,
661 TDB_DATA key, TDB_DATA *data)
663 struct ctdb_call call;
666 call.call_id = CTDB_FETCH_FUNC;
667 call.call_data.dptr = NULL;
668 call.call_data.dsize = 0;
670 ret = ctdb_call(ctdb_db, &call);
673 *data = call.reply_data;
674 talloc_steal(mem_ctx, data->dptr);
683 called when a control completes or timesout to invoke the callback
684 function the user provided
686 static void invoke_control_callback(struct event_context *ev, struct timed_event *te,
687 struct timeval t, void *private_data)
689 struct ctdb_client_control_state *state;
690 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
693 state = talloc_get_type(private_data, struct ctdb_client_control_state);
694 talloc_steal(tmp_ctx, state);
696 ret = ctdb_control_recv(state->ctdb, state, state,
701 talloc_free(tmp_ctx);
705 called when a CTDB_REPLY_CONTROL packet comes in in the client
707 This packet comes in response to a CTDB_REQ_CONTROL request packet. It
708 contains any reply data from the control
710 static void ctdb_client_reply_control(struct ctdb_context *ctdb,
711 struct ctdb_req_header *hdr)
713 struct ctdb_reply_control *c = (struct ctdb_reply_control *)hdr;
714 struct ctdb_client_control_state *state;
716 state = ctdb_reqid_find(ctdb, hdr->reqid, struct ctdb_client_control_state);
718 DEBUG(DEBUG_ERR,(__location__ " reqid %u not found\n", hdr->reqid));
722 if (hdr->reqid != state->reqid) {
723 /* we found a record but it was the wrong one */
724 DEBUG(DEBUG_ERR, ("Dropped orphaned reply control with reqid:%u\n",hdr->reqid));
728 state->outdata.dptr = c->data;
729 state->outdata.dsize = c->datalen;
730 state->status = c->status;
732 state->errormsg = talloc_strndup(state,
733 (char *)&c->data[c->datalen],
737 /* state->outdata now uses resources from c so we dont want c
738 to just dissappear from under us while state is still alive
740 talloc_steal(state, c);
742 state->state = CTDB_CONTROL_DONE;
744 /* if we had a callback registered for this control, pull the response
745 and call the callback.
747 if (state->async.fn) {
748 event_add_timed(ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
754 destroy a ctdb_control in client
756 static int ctdb_control_destructor(struct ctdb_client_control_state *state)
758 ctdb_reqid_remove(state->ctdb, state->reqid);
763 /* time out handler for ctdb_control */
764 static void control_timeout_func(struct event_context *ev, struct timed_event *te,
765 struct timeval t, void *private_data)
767 struct ctdb_client_control_state *state = talloc_get_type(private_data, struct ctdb_client_control_state);
769 DEBUG(DEBUG_ERR,(__location__ " control timed out. reqid:%u opcode:%u "
770 "dstnode:%u\n", state->reqid, state->c->opcode,
771 state->c->hdr.destnode));
773 state->state = CTDB_CONTROL_TIMEOUT;
775 /* if we had a callback registered for this control, pull the response
776 and call the callback.
778 if (state->async.fn) {
779 event_add_timed(state->ctdb->ev, state, timeval_zero(), invoke_control_callback, state);
783 /* async version of send control request */
784 struct ctdb_client_control_state *ctdb_control_send(struct ctdb_context *ctdb,
785 uint32_t destnode, uint64_t srvid,
786 uint32_t opcode, uint32_t flags, TDB_DATA data,
788 struct timeval *timeout,
791 struct ctdb_client_control_state *state;
793 struct ctdb_req_control *c;
800 /* if the domain socket is not yet open, open it */
801 if (ctdb->daemon.sd==-1) {
802 ctdb_socket_connect(ctdb);
805 state = talloc_zero(mem_ctx, struct ctdb_client_control_state);
806 CTDB_NO_MEMORY_NULL(ctdb, state);
809 state->reqid = ctdb_reqid_new(ctdb, state);
810 state->state = CTDB_CONTROL_WAIT;
811 state->errormsg = NULL;
813 talloc_set_destructor(state, ctdb_control_destructor);
815 len = offsetof(struct ctdb_req_control, data) + data.dsize;
816 c = ctdbd_allocate_pkt(ctdb, state, CTDB_REQ_CONTROL,
817 len, struct ctdb_req_control);
819 CTDB_NO_MEMORY_NULL(ctdb, c);
820 c->hdr.reqid = state->reqid;
821 c->hdr.destnode = destnode;
826 c->datalen = data.dsize;
828 memcpy(&c->data[0], data.dptr, data.dsize);
832 if (timeout && !timeval_is_zero(timeout)) {
833 event_add_timed(ctdb->ev, state, *timeout, control_timeout_func, state);
836 ret = ctdb_client_queue_pkt(ctdb, &(c->hdr));
842 if (flags & CTDB_CTRL_FLAG_NOREPLY) {
851 /* async version of receive control reply */
852 int ctdb_control_recv(struct ctdb_context *ctdb,
853 struct ctdb_client_control_state *state,
855 TDB_DATA *outdata, int32_t *status, char **errormsg)
859 if (status != NULL) {
862 if (errormsg != NULL) {
870 /* prevent double free of state */
871 tmp_ctx = talloc_new(ctdb);
872 talloc_steal(tmp_ctx, state);
874 /* loop one event at a time until we either timeout or the control
877 while (state->state == CTDB_CONTROL_WAIT) {
878 event_loop_once(ctdb->ev);
881 if (state->state != CTDB_CONTROL_DONE) {
882 DEBUG(DEBUG_ERR,(__location__ " ctdb_control_recv failed\n"));
883 if (state->async.fn) {
884 state->async.fn(state);
886 talloc_free(tmp_ctx);
890 if (state->errormsg) {
891 DEBUG(DEBUG_ERR,("ctdb_control error: '%s'\n", state->errormsg));
893 (*errormsg) = talloc_move(mem_ctx, &state->errormsg);
895 if (state->async.fn) {
896 state->async.fn(state);
898 talloc_free(tmp_ctx);
903 *outdata = state->outdata;
904 outdata->dptr = talloc_memdup(mem_ctx, outdata->dptr, outdata->dsize);
908 *status = state->status;
911 if (state->async.fn) {
912 state->async.fn(state);
915 talloc_free(tmp_ctx);
922 send a ctdb control message
923 timeout specifies how long we should wait for a reply.
924 if timeout is NULL we wait indefinitely
926 int ctdb_control(struct ctdb_context *ctdb, uint32_t destnode, uint64_t srvid,
927 uint32_t opcode, uint32_t flags, TDB_DATA data,
928 TALLOC_CTX *mem_ctx, TDB_DATA *outdata, int32_t *status,
929 struct timeval *timeout,
932 struct ctdb_client_control_state *state;
934 state = ctdb_control_send(ctdb, destnode, srvid, opcode,
935 flags, data, mem_ctx,
937 return ctdb_control_recv(ctdb, state, mem_ctx, outdata, status,
945 a process exists call. Returns 0 if process exists, -1 otherwise
947 int ctdb_ctrl_process_exists(struct ctdb_context *ctdb, uint32_t destnode, pid_t pid)
953 data.dptr = (uint8_t*)&pid;
954 data.dsize = sizeof(pid);
956 ret = ctdb_control(ctdb, destnode, 0,
957 CTDB_CONTROL_PROCESS_EXISTS, 0, data,
958 NULL, NULL, &status, NULL, NULL);
960 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for process_exists failed\n"));
968 get remote statistics
970 int ctdb_ctrl_statistics(struct ctdb_context *ctdb, uint32_t destnode, struct ctdb_statistics *status)
976 ret = ctdb_control(ctdb, destnode, 0,
977 CTDB_CONTROL_STATISTICS, 0, tdb_null,
978 ctdb, &data, &res, NULL, NULL);
979 if (ret != 0 || res != 0) {
980 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for statistics failed\n"));
984 if (data.dsize != sizeof(struct ctdb_statistics)) {
985 DEBUG(DEBUG_ERR,(__location__ " Wrong statistics size %u - expected %u\n",
986 (unsigned)data.dsize, (unsigned)sizeof(struct ctdb_statistics)));
990 *status = *(struct ctdb_statistics *)data.dptr;
991 talloc_free(data.dptr);
997 shutdown a remote ctdb node
999 int ctdb_ctrl_shutdown(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
1001 struct ctdb_client_control_state *state;
1003 state = ctdb_control_send(ctdb, destnode, 0,
1004 CTDB_CONTROL_SHUTDOWN, 0, tdb_null,
1005 NULL, &timeout, NULL);
1006 if (state == NULL) {
1007 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for shutdown failed\n"));
1015 get vnn map from a remote node
1017 int ctdb_ctrl_getvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_vnn_map **vnnmap)
1022 struct ctdb_vnn_map_wire *map;
1024 ret = ctdb_control(ctdb, destnode, 0,
1025 CTDB_CONTROL_GETVNNMAP, 0, tdb_null,
1026 mem_ctx, &outdata, &res, &timeout, NULL);
1027 if (ret != 0 || res != 0) {
1028 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getvnnmap failed\n"));
1032 map = (struct ctdb_vnn_map_wire *)outdata.dptr;
1033 if (outdata.dsize < offsetof(struct ctdb_vnn_map_wire, map) ||
1034 outdata.dsize != map->size*sizeof(uint32_t) + offsetof(struct ctdb_vnn_map_wire, map)) {
1035 DEBUG(DEBUG_ERR,("Bad vnn map size received in ctdb_ctrl_getvnnmap\n"));
1039 (*vnnmap) = talloc(mem_ctx, struct ctdb_vnn_map);
1040 CTDB_NO_MEMORY(ctdb, *vnnmap);
1041 (*vnnmap)->generation = map->generation;
1042 (*vnnmap)->size = map->size;
1043 (*vnnmap)->map = talloc_array(*vnnmap, uint32_t, map->size);
1045 CTDB_NO_MEMORY(ctdb, (*vnnmap)->map);
1046 memcpy((*vnnmap)->map, map->map, sizeof(uint32_t)*map->size);
1047 talloc_free(outdata.dptr);
1054 get the recovery mode of a remote node
1056 struct ctdb_client_control_state *
1057 ctdb_ctrl_getrecmode_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
1059 return ctdb_control_send(ctdb, destnode, 0,
1060 CTDB_CONTROL_GET_RECMODE, 0, tdb_null,
1061 mem_ctx, &timeout, NULL);
1064 int ctdb_ctrl_getrecmode_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmode)
1069 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1071 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmode_recv failed\n"));
1076 *recmode = (uint32_t)res;
1082 int ctdb_ctrl_getrecmode(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmode)
1084 struct ctdb_client_control_state *state;
1086 state = ctdb_ctrl_getrecmode_send(ctdb, mem_ctx, timeout, destnode);
1087 return ctdb_ctrl_getrecmode_recv(ctdb, mem_ctx, state, recmode);
1094 set the recovery mode of a remote node
1096 int ctdb_ctrl_setrecmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmode)
1102 data.dsize = sizeof(uint32_t);
1103 data.dptr = (unsigned char *)&recmode;
1105 ret = ctdb_control(ctdb, destnode, 0,
1106 CTDB_CONTROL_SET_RECMODE, 0, data,
1107 NULL, NULL, &res, &timeout, NULL);
1108 if (ret != 0 || res != 0) {
1109 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmode failed\n"));
1119 get the recovery master of a remote node
1121 struct ctdb_client_control_state *
1122 ctdb_ctrl_getrecmaster_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx,
1123 struct timeval timeout, uint32_t destnode)
1125 return ctdb_control_send(ctdb, destnode, 0,
1126 CTDB_CONTROL_GET_RECMASTER, 0, tdb_null,
1127 mem_ctx, &timeout, NULL);
1130 int ctdb_ctrl_getrecmaster_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *recmaster)
1135 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1137 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getrecmaster_recv failed\n"));
1142 *recmaster = (uint32_t)res;
1148 int ctdb_ctrl_getrecmaster(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t *recmaster)
1150 struct ctdb_client_control_state *state;
1152 state = ctdb_ctrl_getrecmaster_send(ctdb, mem_ctx, timeout, destnode);
1153 return ctdb_ctrl_getrecmaster_recv(ctdb, mem_ctx, state, recmaster);
1158 set the recovery master of a remote node
1160 int ctdb_ctrl_setrecmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmaster)
1167 data.dsize = sizeof(uint32_t);
1168 data.dptr = (unsigned char *)&recmaster;
1170 ret = ctdb_control(ctdb, destnode, 0,
1171 CTDB_CONTROL_SET_RECMASTER, 0, data,
1172 NULL, NULL, &res, &timeout, NULL);
1173 if (ret != 0 || res != 0) {
1174 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmaster failed\n"));
1183 get a list of databases off a remote node
1185 int ctdb_ctrl_getdbmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1186 TALLOC_CTX *mem_ctx, struct ctdb_dbid_map **dbmap)
1192 ret = ctdb_control(ctdb, destnode, 0,
1193 CTDB_CONTROL_GET_DBMAP, 0, tdb_null,
1194 mem_ctx, &outdata, &res, &timeout, NULL);
1195 if (ret != 0 || res != 0) {
1196 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getdbmap failed ret:%d res:%d\n", ret, res));
1200 *dbmap = (struct ctdb_dbid_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1201 talloc_free(outdata.dptr);
1207 get a list of nodes (vnn and flags ) from a remote node
1209 int ctdb_ctrl_getnodemap(struct ctdb_context *ctdb,
1210 struct timeval timeout, uint32_t destnode,
1211 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1217 ret = ctdb_control(ctdb, destnode, 0,
1218 CTDB_CONTROL_GET_NODEMAP, 0, tdb_null,
1219 mem_ctx, &outdata, &res, &timeout, NULL);
1220 if (ret == 0 && res == -1 && outdata.dsize == 0) {
1221 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed, falling back to ipv4-only control\n"));
1222 return ctdb_ctrl_getnodemapv4(ctdb, timeout, destnode, mem_ctx, nodemap);
1224 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1225 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodes failed ret:%d res:%d\n", ret, res));
1229 *nodemap = (struct ctdb_node_map *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
1230 talloc_free(outdata.dptr);
1236 old style ipv4-only get a list of nodes (vnn and flags ) from a remote node
1238 int ctdb_ctrl_getnodemapv4(struct ctdb_context *ctdb,
1239 struct timeval timeout, uint32_t destnode,
1240 TALLOC_CTX *mem_ctx, struct ctdb_node_map **nodemap)
1244 struct ctdb_node_mapv4 *nodemapv4;
1247 ret = ctdb_control(ctdb, destnode, 0,
1248 CTDB_CONTROL_GET_NODEMAPv4, 0, tdb_null,
1249 mem_ctx, &outdata, &res, &timeout, NULL);
1250 if (ret != 0 || res != 0 || outdata.dsize == 0) {
1251 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getnodesv4 failed ret:%d res:%d\n", ret, res));
1255 nodemapv4 = (struct ctdb_node_mapv4 *)outdata.dptr;
1257 len = offsetof(struct ctdb_node_map, nodes) + nodemapv4->num*sizeof(struct ctdb_node_and_flags);
1258 (*nodemap) = talloc_zero_size(mem_ctx, len);
1259 CTDB_NO_MEMORY(ctdb, (*nodemap));
1261 (*nodemap)->num = nodemapv4->num;
1262 for (i=0; i<nodemapv4->num; i++) {
1263 (*nodemap)->nodes[i].pnn = nodemapv4->nodes[i].pnn;
1264 (*nodemap)->nodes[i].flags = nodemapv4->nodes[i].flags;
1265 (*nodemap)->nodes[i].addr.ip = nodemapv4->nodes[i].sin;
1266 (*nodemap)->nodes[i].addr.sa.sa_family = AF_INET;
1269 talloc_free(outdata.dptr);
1275 drop the transport, reload the nodes file and restart the transport
1277 int ctdb_ctrl_reload_nodes_file(struct ctdb_context *ctdb,
1278 struct timeval timeout, uint32_t destnode)
1283 ret = ctdb_control(ctdb, destnode, 0,
1284 CTDB_CONTROL_RELOAD_NODES_FILE, 0, tdb_null,
1285 NULL, NULL, &res, &timeout, NULL);
1286 if (ret != 0 || res != 0) {
1287 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reloadnodesfile failed\n"));
1296 set vnn map on a node
1298 int ctdb_ctrl_setvnnmap(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1299 TALLOC_CTX *mem_ctx, struct ctdb_vnn_map *vnnmap)
1304 struct ctdb_vnn_map_wire *map;
1307 len = offsetof(struct ctdb_vnn_map_wire, map) + sizeof(uint32_t)*vnnmap->size;
1308 map = talloc_size(mem_ctx, len);
1309 CTDB_NO_MEMORY(ctdb, map);
1311 map->generation = vnnmap->generation;
1312 map->size = vnnmap->size;
1313 memcpy(map->map, vnnmap->map, sizeof(uint32_t)*map->size);
1316 data.dptr = (uint8_t *)map;
1318 ret = ctdb_control(ctdb, destnode, 0,
1319 CTDB_CONTROL_SETVNNMAP, 0, data,
1320 NULL, NULL, &res, &timeout, NULL);
1321 if (ret != 0 || res != 0) {
1322 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setvnnmap failed\n"));
1333 async send for pull database
1335 struct ctdb_client_control_state *ctdb_ctrl_pulldb_send(
1336 struct ctdb_context *ctdb, uint32_t destnode, uint32_t dbid,
1337 uint32_t lmaster, TALLOC_CTX *mem_ctx, struct timeval timeout)
1340 struct ctdb_control_pulldb *pull;
1341 struct ctdb_client_control_state *state;
1343 pull = talloc(mem_ctx, struct ctdb_control_pulldb);
1344 CTDB_NO_MEMORY_NULL(ctdb, pull);
1347 pull->lmaster = lmaster;
1349 indata.dsize = sizeof(struct ctdb_control_pulldb);
1350 indata.dptr = (unsigned char *)pull;
1352 state = ctdb_control_send(ctdb, destnode, 0,
1353 CTDB_CONTROL_PULL_DB, 0, indata,
1354 mem_ctx, &timeout, NULL);
1361 async recv for pull database
1363 int ctdb_ctrl_pulldb_recv(
1364 struct ctdb_context *ctdb,
1365 TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state,
1371 ret = ctdb_control_recv(ctdb, state, mem_ctx, outdata, &res, NULL);
1372 if ( (ret != 0) || (res != 0) ){
1373 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_pulldb_recv failed\n"));
1381 pull all keys and records for a specific database on a node
1383 int ctdb_ctrl_pulldb(struct ctdb_context *ctdb, uint32_t destnode,
1384 uint32_t dbid, uint32_t lmaster,
1385 TALLOC_CTX *mem_ctx, struct timeval timeout,
1388 struct ctdb_client_control_state *state;
1390 state = ctdb_ctrl_pulldb_send(ctdb, destnode, dbid, lmaster, mem_ctx,
1393 return ctdb_ctrl_pulldb_recv(ctdb, mem_ctx, state, outdata);
1398 change dmaster for all keys in the database to the new value
1400 int ctdb_ctrl_setdmaster(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1401 TALLOC_CTX *mem_ctx, uint32_t dbid, uint32_t dmaster)
1407 indata.dsize = 2*sizeof(uint32_t);
1408 indata.dptr = (unsigned char *)talloc_array(mem_ctx, uint32_t, 2);
1410 ((uint32_t *)(&indata.dptr[0]))[0] = dbid;
1411 ((uint32_t *)(&indata.dptr[0]))[1] = dmaster;
1413 ret = ctdb_control(ctdb, destnode, 0,
1414 CTDB_CONTROL_SET_DMASTER, 0, indata,
1415 NULL, NULL, &res, &timeout, NULL);
1416 if (ret != 0 || res != 0) {
1417 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setdmaster failed\n"));
1425 ping a node, return number of clients connected
1427 int ctdb_ctrl_ping(struct ctdb_context *ctdb, uint32_t destnode)
1432 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_PING, 0,
1433 tdb_null, NULL, NULL, &res, NULL, NULL);
1441 find the real path to a ltdb
1443 int ctdb_ctrl_getdbpath(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1450 data.dptr = (uint8_t *)&dbid;
1451 data.dsize = sizeof(dbid);
1453 ret = ctdb_control(ctdb, destnode, 0,
1454 CTDB_CONTROL_GETDBPATH, 0, data,
1455 mem_ctx, &data, &res, &timeout, NULL);
1456 if (ret != 0 || res != 0) {
1460 (*path) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1461 if ((*path) == NULL) {
1465 talloc_free(data.dptr);
1471 find the name of a db
1473 int ctdb_ctrl_getdbname(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t dbid, TALLOC_CTX *mem_ctx,
1480 data.dptr = (uint8_t *)&dbid;
1481 data.dsize = sizeof(dbid);
1483 ret = ctdb_control(ctdb, destnode, 0,
1484 CTDB_CONTROL_GET_DBNAME, 0, data,
1485 mem_ctx, &data, &res, &timeout, NULL);
1486 if (ret != 0 || res != 0) {
1490 (*name) = talloc_strndup(mem_ctx, (const char *)data.dptr, data.dsize);
1491 if ((*name) == NULL) {
1495 talloc_free(data.dptr);
1503 int ctdb_ctrl_createdb(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
1504 TALLOC_CTX *mem_ctx, const char *name, bool persistent)
1510 data.dptr = discard_const(name);
1511 data.dsize = strlen(name)+1;
1513 ret = ctdb_control(ctdb, destnode, 0,
1514 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
1516 mem_ctx, &data, &res, &timeout, NULL);
1518 if (ret != 0 || res != 0) {
1526 get debug level on a node
1528 int ctdb_ctrl_get_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t *level)
1534 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_DEBUG, 0, tdb_null,
1535 ctdb, &data, &res, NULL, NULL);
1536 if (ret != 0 || res != 0) {
1539 if (data.dsize != sizeof(int32_t)) {
1540 DEBUG(DEBUG_ERR,("Bad control reply size in ctdb_get_debuglevel (got %u)\n",
1541 (unsigned)data.dsize));
1544 *level = *(int32_t *)data.dptr;
1545 talloc_free(data.dptr);
1550 set debug level on a node
1552 int ctdb_ctrl_set_debuglevel(struct ctdb_context *ctdb, uint32_t destnode, int32_t level)
1558 data.dptr = (uint8_t *)&level;
1559 data.dsize = sizeof(level);
1561 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_DEBUG, 0, data,
1562 NULL, NULL, &res, NULL, NULL);
1563 if (ret != 0 || res != 0) {
1571 get a list of connected nodes
1573 uint32_t *ctdb_get_connected_nodes(struct ctdb_context *ctdb,
1574 struct timeval timeout,
1575 TALLOC_CTX *mem_ctx,
1576 uint32_t *num_nodes)
1578 struct ctdb_node_map *map=NULL;
1584 ret = ctdb_ctrl_getnodemap(ctdb, timeout, CTDB_CURRENT_NODE, mem_ctx, &map);
1589 nodes = talloc_array(mem_ctx, uint32_t, map->num);
1590 if (nodes == NULL) {
1594 for (i=0;i<map->num;i++) {
1595 if (!(map->nodes[i].flags & NODE_FLAGS_DISCONNECTED)) {
1596 nodes[*num_nodes] = map->nodes[i].pnn;
1608 int ctdb_statistics_reset(struct ctdb_context *ctdb, uint32_t destnode)
1613 ret = ctdb_control(ctdb, destnode, 0,
1614 CTDB_CONTROL_STATISTICS_RESET, 0, tdb_null,
1615 NULL, NULL, &res, NULL, NULL);
1616 if (ret != 0 || res != 0) {
1617 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for reset statistics failed\n"));
1624 this is the dummy null procedure that all databases support
1626 static int ctdb_null_func(struct ctdb_call_info *call)
1632 this is a plain fetch procedure that all databases support
1634 static int ctdb_fetch_func(struct ctdb_call_info *call)
1636 call->reply_data = &call->record_data;
1641 attach to a specific database - client call
1643 struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name, bool persistent, uint32_t tdb_flags)
1645 struct ctdb_db_context *ctdb_db;
1650 ctdb_db = ctdb_db_handle(ctdb, name);
1655 ctdb_db = talloc_zero(ctdb, struct ctdb_db_context);
1656 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db);
1658 ctdb_db->ctdb = ctdb;
1659 ctdb_db->db_name = talloc_strdup(ctdb_db, name);
1660 CTDB_NO_MEMORY_NULL(ctdb, ctdb_db->db_name);
1662 data.dptr = discard_const(name);
1663 data.dsize = strlen(name)+1;
1665 /* tell ctdb daemon to attach */
1666 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, tdb_flags,
1667 persistent?CTDB_CONTROL_DB_ATTACH_PERSISTENT:CTDB_CONTROL_DB_ATTACH,
1668 0, data, ctdb_db, &data, &res, NULL, NULL);
1669 if (ret != 0 || res != 0 || data.dsize != sizeof(uint32_t)) {
1670 DEBUG(DEBUG_ERR,("Failed to attach to database '%s'\n", name));
1671 talloc_free(ctdb_db);
1675 ctdb_db->db_id = *(uint32_t *)data.dptr;
1676 talloc_free(data.dptr);
1678 ret = ctdb_ctrl_getdbpath(ctdb, timeval_current_ofs(2, 0), CTDB_CURRENT_NODE, ctdb_db->db_id, ctdb_db, &ctdb_db->db_path);
1680 DEBUG(DEBUG_ERR,("Failed to get dbpath for database '%s'\n", name));
1681 talloc_free(ctdb_db);
1685 tdb_flags = persistent?TDB_DEFAULT:TDB_NOSYNC;
1686 if (!ctdb->do_setsched) {
1687 tdb_flags |= TDB_NOMMAP;
1690 ctdb_db->ltdb = tdb_wrap_open(ctdb, ctdb_db->db_path, 0, tdb_flags, O_RDWR, 0);
1691 if (ctdb_db->ltdb == NULL) {
1692 ctdb_set_error(ctdb, "Failed to open tdb '%s'\n", ctdb_db->db_path);
1693 talloc_free(ctdb_db);
1697 ctdb_db->persistent = persistent;
1699 DLIST_ADD(ctdb->db_list, ctdb_db);
1701 /* add well known functions */
1702 ctdb_set_call(ctdb_db, ctdb_null_func, CTDB_NULL_FUNC);
1703 ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC);
1710 setup a call for a database
1712 int ctdb_set_call(struct ctdb_db_context *ctdb_db, ctdb_fn_t fn, uint32_t id)
1714 struct ctdb_registered_call *call;
1719 struct ctdb_control_set_call c;
1722 /* this is no longer valid with the separate daemon architecture */
1723 c.db_id = ctdb_db->db_id;
1727 data.dptr = (uint8_t *)&c;
1728 data.dsize = sizeof(c);
1730 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_SET_CALL, 0,
1731 data, NULL, NULL, &status, NULL, NULL);
1732 if (ret != 0 || status != 0) {
1733 DEBUG(DEBUG_ERR,("ctdb_set_call failed for call %u\n", id));
1738 /* also register locally */
1739 call = talloc(ctdb_db, struct ctdb_registered_call);
1743 DLIST_ADD(ctdb_db->calls, call);
1748 struct traverse_state {
1751 ctdb_traverse_func fn;
1756 called on each key during a ctdb_traverse
1758 static void traverse_handler(struct ctdb_context *ctdb, uint64_t srvid, TDB_DATA data, void *p)
1760 struct traverse_state *state = (struct traverse_state *)p;
1761 struct ctdb_rec_data *d = (struct ctdb_rec_data *)data.dptr;
1764 if (data.dsize < sizeof(uint32_t) ||
1765 d->length != data.dsize) {
1766 DEBUG(DEBUG_ERR,("Bad data size %u in traverse_handler\n", (unsigned)data.dsize));
1771 key.dsize = d->keylen;
1772 key.dptr = &d->data[0];
1773 data.dsize = d->datalen;
1774 data.dptr = &d->data[d->keylen];
1776 if (key.dsize == 0 && data.dsize == 0) {
1777 /* end of traverse */
1782 if (data.dsize == sizeof(struct ctdb_ltdb_header)) {
1783 /* empty records are deleted records in ctdb */
1787 if (state->fn(ctdb, key, data, state->private_data) != 0) {
1796 start a cluster wide traverse, calling the supplied fn on each record
1797 return the number of records traversed, or -1 on error
1799 int ctdb_traverse(struct ctdb_db_context *ctdb_db, ctdb_traverse_func fn, void *private_data)
1802 struct ctdb_traverse_start t;
1805 uint64_t srvid = (getpid() | 0xFLL<<60);
1806 struct traverse_state state;
1810 state.private_data = private_data;
1813 ret = ctdb_set_message_handler(ctdb_db->ctdb, srvid, traverse_handler, &state);
1815 DEBUG(DEBUG_ERR,("Failed to setup traverse handler\n"));
1819 t.db_id = ctdb_db->db_id;
1823 data.dptr = (uint8_t *)&t;
1824 data.dsize = sizeof(t);
1826 ret = ctdb_control(ctdb_db->ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_TRAVERSE_START, 0,
1827 data, NULL, NULL, &status, NULL, NULL);
1828 if (ret != 0 || status != 0) {
1829 DEBUG(DEBUG_ERR,("ctdb_traverse_all failed\n"));
1830 ctdb_remove_message_handler(ctdb_db->ctdb, srvid, &state);
1834 while (!state.done) {
1835 event_loop_once(ctdb_db->ctdb->ev);
1838 ret = ctdb_remove_message_handler(ctdb_db->ctdb, srvid, &state);
1840 DEBUG(DEBUG_ERR,("Failed to remove ctdb_traverse handler\n"));
1847 #define ISASCII(x) ((x>31)&&(x<128))
1849 called on each key during a catdb
1851 static int dumpdb_fn(struct ctdb_context *ctdb, TDB_DATA key, TDB_DATA data, void *p)
1854 FILE *f = (FILE *)p;
1855 struct ctdb_ltdb_header *h = (struct ctdb_ltdb_header *)data.dptr;
1857 fprintf(f, "dmaster: %u\n", h->dmaster);
1858 fprintf(f, "rsn: %llu\n", (unsigned long long)h->rsn);
1860 fprintf(f, "key(%u) = \"", (unsigned)key.dsize);
1861 for (i=0;i<key.dsize;i++) {
1862 if (ISASCII(key.dptr[i])) {
1863 fprintf(f, "%c", key.dptr[i]);
1865 fprintf(f, "\\%02X", key.dptr[i]);
1870 fprintf(f, "data(%u) = \"", (unsigned)data.dsize);
1871 for (i=sizeof(*h);i<data.dsize;i++) {
1872 if (ISASCII(data.dptr[i])) {
1873 fprintf(f, "%c", data.dptr[i]);
1875 fprintf(f, "\\%02X", data.dptr[i]);
1884 convenience function to list all keys to stdout
1886 int ctdb_dump_db(struct ctdb_db_context *ctdb_db, FILE *f)
1888 return ctdb_traverse(ctdb_db, dumpdb_fn, f);
1892 get the pid of a ctdb daemon
1894 int ctdb_ctrl_getpid(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *pid)
1899 ret = ctdb_control(ctdb, destnode, 0,
1900 CTDB_CONTROL_GET_PID, 0, tdb_null,
1901 NULL, NULL, &res, &timeout, NULL);
1903 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpid failed\n"));
1914 async freeze send control
1916 struct ctdb_client_control_state *
1917 ctdb_ctrl_freeze_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, uint32_t priority)
1919 return ctdb_control_send(ctdb, destnode, priority,
1920 CTDB_CONTROL_FREEZE, 0, tdb_null,
1921 mem_ctx, &timeout, NULL);
1925 async freeze recv control
1927 int ctdb_ctrl_freeze_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state)
1932 ret = ctdb_control_recv(ctdb, state, mem_ctx, NULL, &res, NULL);
1933 if ( (ret != 0) || (res != 0) ){
1934 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_freeze_recv failed\n"));
1942 freeze databases of a certain priority
1944 int ctdb_ctrl_freeze_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
1946 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
1947 struct ctdb_client_control_state *state;
1950 state = ctdb_ctrl_freeze_send(ctdb, tmp_ctx, timeout, destnode, priority);
1951 ret = ctdb_ctrl_freeze_recv(ctdb, tmp_ctx, state);
1952 talloc_free(tmp_ctx);
1957 /* Freeze all databases */
1958 int ctdb_ctrl_freeze(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
1962 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
1963 if (ctdb_ctrl_freeze_priority(ctdb, timeout, destnode, i) != 0) {
1971 thaw databases of a certain priority
1973 int ctdb_ctrl_thaw_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t priority)
1978 ret = ctdb_control(ctdb, destnode, priority,
1979 CTDB_CONTROL_THAW, 0, tdb_null,
1980 NULL, NULL, &res, &timeout, NULL);
1981 if (ret != 0 || res != 0) {
1982 DEBUG(DEBUG_ERR,(__location__ " ctdb_control thaw failed\n"));
1989 /* thaw all databases */
1990 int ctdb_ctrl_thaw(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
1992 return ctdb_ctrl_thaw_priority(ctdb, timeout, destnode, 0);
1996 get pnn of a node, or -1
1998 int ctdb_ctrl_getpnn(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2003 ret = ctdb_control(ctdb, destnode, 0,
2004 CTDB_CONTROL_GET_PNN, 0, tdb_null,
2005 NULL, NULL, &res, &timeout, NULL);
2007 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpnn failed\n"));
2015 get the monitoring mode of a remote node
2017 int ctdb_ctrl_getmonmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *monmode)
2022 ret = ctdb_control(ctdb, destnode, 0,
2023 CTDB_CONTROL_GET_MONMODE, 0, tdb_null,
2024 NULL, NULL, &res, &timeout, NULL);
2026 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getmonmode failed\n"));
2037 set the monitoring mode of a remote node to active
2039 int ctdb_ctrl_enable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2044 ret = ctdb_control(ctdb, destnode, 0,
2045 CTDB_CONTROL_ENABLE_MONITOR, 0, tdb_null,
2046 NULL, NULL,NULL, &timeout, NULL);
2048 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enable_monitor failed\n"));
2058 set the monitoring mode of a remote node to disable
2060 int ctdb_ctrl_disable_monmode(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2065 ret = ctdb_control(ctdb, destnode, 0,
2066 CTDB_CONTROL_DISABLE_MONITOR, 0, tdb_null,
2067 NULL, NULL, NULL, &timeout, NULL);
2069 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disable_monitor failed\n"));
2081 sent to a node to make it take over an ip address
2083 int ctdb_ctrl_takeover_ip(struct ctdb_context *ctdb, struct timeval timeout,
2084 uint32_t destnode, struct ctdb_public_ip *ip)
2087 struct ctdb_public_ipv4 ipv4;
2091 if (ip->addr.sa.sa_family == AF_INET) {
2093 ipv4.sin = ip->addr.ip;
2095 data.dsize = sizeof(ipv4);
2096 data.dptr = (uint8_t *)&ipv4;
2098 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IPv4, 0, data, NULL,
2099 NULL, &res, &timeout, NULL);
2101 data.dsize = sizeof(*ip);
2102 data.dptr = (uint8_t *)ip;
2104 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_TAKEOVER_IP, 0, data, NULL,
2105 NULL, &res, &timeout, NULL);
2108 if (ret != 0 || res != 0) {
2109 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for takeover_ip failed\n"));
2118 sent to a node to make it release an ip address
2120 int ctdb_ctrl_release_ip(struct ctdb_context *ctdb, struct timeval timeout,
2121 uint32_t destnode, struct ctdb_public_ip *ip)
2124 struct ctdb_public_ipv4 ipv4;
2128 if (ip->addr.sa.sa_family == AF_INET) {
2130 ipv4.sin = ip->addr.ip;
2132 data.dsize = sizeof(ipv4);
2133 data.dptr = (uint8_t *)&ipv4;
2135 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IPv4, 0, data, NULL,
2136 NULL, &res, &timeout, NULL);
2138 data.dsize = sizeof(*ip);
2139 data.dptr = (uint8_t *)ip;
2141 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_RELEASE_IP, 0, data, NULL,
2142 NULL, &res, &timeout, NULL);
2145 if (ret != 0 || res != 0) {
2146 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for release_ip failed\n"));
2157 int ctdb_ctrl_get_tunable(struct ctdb_context *ctdb,
2158 struct timeval timeout,
2160 const char *name, uint32_t *value)
2162 struct ctdb_control_get_tunable *t;
2163 TDB_DATA data, outdata;
2167 data.dsize = offsetof(struct ctdb_control_get_tunable, name) + strlen(name) + 1;
2168 data.dptr = talloc_size(ctdb, data.dsize);
2169 CTDB_NO_MEMORY(ctdb, data.dptr);
2171 t = (struct ctdb_control_get_tunable *)data.dptr;
2172 t->length = strlen(name)+1;
2173 memcpy(t->name, name, t->length);
2175 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_TUNABLE, 0, data, ctdb,
2176 &outdata, &res, &timeout, NULL);
2177 talloc_free(data.dptr);
2178 if (ret != 0 || res != 0) {
2179 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_tunable failed\n"));
2183 if (outdata.dsize != sizeof(uint32_t)) {
2184 DEBUG(DEBUG_ERR,("Invalid return data in get_tunable\n"));
2185 talloc_free(outdata.dptr);
2189 *value = *(uint32_t *)outdata.dptr;
2190 talloc_free(outdata.dptr);
2198 int ctdb_ctrl_set_tunable(struct ctdb_context *ctdb,
2199 struct timeval timeout,
2201 const char *name, uint32_t value)
2203 struct ctdb_control_set_tunable *t;
2208 data.dsize = offsetof(struct ctdb_control_set_tunable, name) + strlen(name) + 1;
2209 data.dptr = talloc_size(ctdb, data.dsize);
2210 CTDB_NO_MEMORY(ctdb, data.dptr);
2212 t = (struct ctdb_control_set_tunable *)data.dptr;
2213 t->length = strlen(name)+1;
2214 memcpy(t->name, name, t->length);
2217 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SET_TUNABLE, 0, data, NULL,
2218 NULL, &res, &timeout, NULL);
2219 talloc_free(data.dptr);
2220 if (ret != 0 || res != 0) {
2221 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_tunable failed\n"));
2231 int ctdb_ctrl_list_tunables(struct ctdb_context *ctdb,
2232 struct timeval timeout,
2234 TALLOC_CTX *mem_ctx,
2235 const char ***list, uint32_t *count)
2240 struct ctdb_control_list_tunable *t;
2243 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_LIST_TUNABLES, 0, tdb_null,
2244 mem_ctx, &outdata, &res, &timeout, NULL);
2245 if (ret != 0 || res != 0) {
2246 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for list_tunables failed\n"));
2250 t = (struct ctdb_control_list_tunable *)outdata.dptr;
2251 if (outdata.dsize < offsetof(struct ctdb_control_list_tunable, data) ||
2252 t->length > outdata.dsize-offsetof(struct ctdb_control_list_tunable, data)) {
2253 DEBUG(DEBUG_ERR,("Invalid data in list_tunables reply\n"));
2254 talloc_free(outdata.dptr);
2258 p = talloc_strndup(mem_ctx, (char *)t->data, t->length);
2259 CTDB_NO_MEMORY(ctdb, p);
2261 talloc_free(outdata.dptr);
2266 for (s=strtok_r(p, ":", &ptr); s; s=strtok_r(NULL, ":", &ptr)) {
2267 (*list) = talloc_realloc(mem_ctx, *list, const char *, 1+(*count));
2268 CTDB_NO_MEMORY(ctdb, *list);
2269 (*list)[*count] = talloc_strdup(*list, s);
2270 CTDB_NO_MEMORY(ctdb, (*list)[*count]);
2280 int ctdb_ctrl_get_public_ips(struct ctdb_context *ctdb,
2281 struct timeval timeout, uint32_t destnode,
2282 TALLOC_CTX *mem_ctx, struct ctdb_all_public_ips **ips)
2288 ret = ctdb_control(ctdb, destnode, 0,
2289 CTDB_CONTROL_GET_PUBLIC_IPS, 0, tdb_null,
2290 mem_ctx, &outdata, &res, &timeout, NULL);
2291 if (ret == 0 && res == -1) {
2292 DEBUG(DEBUG_ERR,(__location__ " ctdb_control to get public ips failed, falling back to ipv4-only version\n"));
2293 return ctdb_ctrl_get_public_ipsv4(ctdb, timeout, destnode, mem_ctx, ips);
2295 if (ret != 0 || res != 0) {
2296 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed ret:%d res:%d\n", ret, res));
2300 *ips = (struct ctdb_all_public_ips *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
2301 talloc_free(outdata.dptr);
2306 int ctdb_ctrl_get_public_ipsv4(struct ctdb_context *ctdb,
2307 struct timeval timeout, uint32_t destnode,
2308 TALLOC_CTX *mem_ctx, struct ctdb_all_public_ips **ips)
2313 struct ctdb_all_public_ipsv4 *ipsv4;
2315 ret = ctdb_control(ctdb, destnode, 0,
2316 CTDB_CONTROL_GET_PUBLIC_IPSv4, 0, tdb_null,
2317 mem_ctx, &outdata, &res, &timeout, NULL);
2318 if (ret != 0 || res != 0) {
2319 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getpublicips failed\n"));
2323 ipsv4 = (struct ctdb_all_public_ipsv4 *)outdata.dptr;
2324 len = offsetof(struct ctdb_all_public_ips, ips) +
2325 ipsv4->num*sizeof(struct ctdb_public_ip);
2326 *ips = talloc_zero_size(mem_ctx, len);
2327 CTDB_NO_MEMORY(ctdb, *ips);
2328 (*ips)->num = ipsv4->num;
2329 for (i=0; i<ipsv4->num; i++) {
2330 (*ips)->ips[i].pnn = ipsv4->ips[i].pnn;
2331 (*ips)->ips[i].addr.ip = ipsv4->ips[i].sin;
2334 talloc_free(outdata.dptr);
2340 set/clear the permanent disabled bit on a remote node
2342 int ctdb_ctrl_modflags(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode,
2343 uint32_t set, uint32_t clear)
2347 struct ctdb_node_map *nodemap=NULL;
2348 struct ctdb_node_flag_change c;
2349 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2354 /* find the recovery master */
2355 ret = ctdb_ctrl_getrecmaster(ctdb, tmp_ctx, timeout, CTDB_CURRENT_NODE, &recmaster);
2357 DEBUG(DEBUG_ERR, (__location__ " Unable to get recmaster from local node\n"));
2358 talloc_free(tmp_ctx);
2363 /* read the node flags from the recmaster */
2364 ret = ctdb_ctrl_getnodemap(ctdb, timeout, recmaster, tmp_ctx, &nodemap);
2366 DEBUG(DEBUG_ERR, (__location__ " Unable to get nodemap from node %u\n", destnode));
2367 talloc_free(tmp_ctx);
2370 if (destnode >= nodemap->num) {
2371 DEBUG(DEBUG_ERR,(__location__ " Nodemap from recmaster does not contain node %d\n", destnode));
2372 talloc_free(tmp_ctx);
2377 c.old_flags = nodemap->nodes[destnode].flags;
2378 c.new_flags = c.old_flags;
2380 c.new_flags &= ~clear;
2382 data.dsize = sizeof(c);
2383 data.dptr = (unsigned char *)&c;
2385 /* send the flags update to all connected nodes */
2386 nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true);
2388 if (ctdb_client_async_control(ctdb, CTDB_CONTROL_MODIFY_FLAGS,
2390 timeout, false, data,
2393 DEBUG(DEBUG_ERR, (__location__ " Unable to update nodeflags on remote nodes\n"));
2395 talloc_free(tmp_ctx);
2399 talloc_free(tmp_ctx);
2407 int ctdb_ctrl_get_all_tunables(struct ctdb_context *ctdb,
2408 struct timeval timeout,
2410 struct ctdb_tunable *tunables)
2416 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_GET_ALL_TUNABLES, 0, tdb_null, ctdb,
2417 &outdata, &res, &timeout, NULL);
2418 if (ret != 0 || res != 0) {
2419 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get all tunables failed\n"));
2423 if (outdata.dsize != sizeof(*tunables)) {
2424 DEBUG(DEBUG_ERR,(__location__ " bad data size %u in ctdb_ctrl_get_all_tunables should be %u\n",
2425 (unsigned)outdata.dsize, (unsigned)sizeof(*tunables)));
2429 *tunables = *(struct ctdb_tunable *)outdata.dptr;
2430 talloc_free(outdata.dptr);
2435 add a public address to a node
2437 int ctdb_ctrl_add_public_ip(struct ctdb_context *ctdb,
2438 struct timeval timeout,
2440 struct ctdb_control_ip_iface *pub)
2446 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
2447 data.dptr = (unsigned char *)pub;
2449 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_ADD_PUBLIC_IP, 0, data, NULL,
2450 NULL, &res, &timeout, NULL);
2451 if (ret != 0 || res != 0) {
2452 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for add_public_ip failed\n"));
2460 delete a public address from a node
2462 int ctdb_ctrl_del_public_ip(struct ctdb_context *ctdb,
2463 struct timeval timeout,
2465 struct ctdb_control_ip_iface *pub)
2471 data.dsize = offsetof(struct ctdb_control_ip_iface, iface) + pub->len;
2472 data.dptr = (unsigned char *)pub;
2474 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_DEL_PUBLIC_IP, 0, data, NULL,
2475 NULL, &res, &timeout, NULL);
2476 if (ret != 0 || res != 0) {
2477 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for del_public_ip failed\n"));
2485 kill a tcp connection
2487 int ctdb_ctrl_killtcp(struct ctdb_context *ctdb,
2488 struct timeval timeout,
2490 struct ctdb_control_killtcp *killtcp)
2496 data.dsize = sizeof(struct ctdb_control_killtcp);
2497 data.dptr = (unsigned char *)killtcp;
2499 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_KILL_TCP, 0, data, NULL,
2500 NULL, &res, &timeout, NULL);
2501 if (ret != 0 || res != 0) {
2502 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for killtcp failed\n"));
2512 int ctdb_ctrl_gratious_arp(struct ctdb_context *ctdb,
2513 struct timeval timeout,
2515 ctdb_sock_addr *addr,
2521 struct ctdb_control_gratious_arp *gratious_arp;
2522 TALLOC_CTX *tmp_ctx = talloc_new(ctdb);
2525 len = strlen(ifname)+1;
2526 gratious_arp = talloc_size(tmp_ctx,
2527 offsetof(struct ctdb_control_gratious_arp, iface) + len);
2528 CTDB_NO_MEMORY(ctdb, gratious_arp);
2530 gratious_arp->addr = *addr;
2531 gratious_arp->len = len;
2532 memcpy(&gratious_arp->iface[0], ifname, len);
2535 data.dsize = offsetof(struct ctdb_control_gratious_arp, iface) + len;
2536 data.dptr = (unsigned char *)gratious_arp;
2538 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_SEND_GRATIOUS_ARP, 0, data, NULL,
2539 NULL, &res, &timeout, NULL);
2540 if (ret != 0 || res != 0) {
2541 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for gratious_arp failed\n"));
2542 talloc_free(tmp_ctx);
2546 talloc_free(tmp_ctx);
2551 get a list of all tcp tickles that a node knows about for a particular vnn
2553 int ctdb_ctrl_get_tcp_tickles(struct ctdb_context *ctdb,
2554 struct timeval timeout, uint32_t destnode,
2555 TALLOC_CTX *mem_ctx,
2556 ctdb_sock_addr *addr,
2557 struct ctdb_control_tcp_tickle_list **list)
2560 TDB_DATA data, outdata;
2563 data.dptr = (uint8_t*)addr;
2564 data.dsize = sizeof(ctdb_sock_addr);
2566 ret = ctdb_control(ctdb, destnode, 0,
2567 CTDB_CONTROL_GET_TCP_TICKLE_LIST, 0, data,
2568 mem_ctx, &outdata, &status, NULL, NULL);
2569 if (ret != 0 || status != 0) {
2570 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get tcp tickles failed\n"));
2574 *list = (struct ctdb_control_tcp_tickle_list *)outdata.dptr;
2580 register a server id
2582 int ctdb_ctrl_register_server_id(struct ctdb_context *ctdb,
2583 struct timeval timeout,
2584 struct ctdb_server_id *id)
2590 data.dsize = sizeof(struct ctdb_server_id);
2591 data.dptr = (unsigned char *)id;
2593 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
2594 CTDB_CONTROL_REGISTER_SERVER_ID,
2596 NULL, &res, &timeout, NULL);
2597 if (ret != 0 || res != 0) {
2598 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for register server id failed\n"));
2606 unregister a server id
2608 int ctdb_ctrl_unregister_server_id(struct ctdb_context *ctdb,
2609 struct timeval timeout,
2610 struct ctdb_server_id *id)
2616 data.dsize = sizeof(struct ctdb_server_id);
2617 data.dptr = (unsigned char *)id;
2619 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0,
2620 CTDB_CONTROL_UNREGISTER_SERVER_ID,
2622 NULL, &res, &timeout, NULL);
2623 if (ret != 0 || res != 0) {
2624 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for unregister server id failed\n"));
2633 check if a server id exists
2635 if a server id does exist, return *status == 1, otherwise *status == 0
2637 int ctdb_ctrl_check_server_id(struct ctdb_context *ctdb,
2638 struct timeval timeout,
2640 struct ctdb_server_id *id,
2647 data.dsize = sizeof(struct ctdb_server_id);
2648 data.dptr = (unsigned char *)id;
2650 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CHECK_SERVER_ID,
2652 NULL, &res, &timeout, NULL);
2654 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for check server id failed\n"));
2668 get the list of server ids that are registered on a node
2670 int ctdb_ctrl_get_server_id_list(struct ctdb_context *ctdb,
2671 TALLOC_CTX *mem_ctx,
2672 struct timeval timeout, uint32_t destnode,
2673 struct ctdb_server_id_list **svid_list)
2679 ret = ctdb_control(ctdb, destnode, 0,
2680 CTDB_CONTROL_GET_SERVER_ID_LIST, 0, tdb_null,
2681 mem_ctx, &outdata, &res, &timeout, NULL);
2682 if (ret != 0 || res != 0) {
2683 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for get_server_id_list failed\n"));
2687 *svid_list = (struct ctdb_server_id_list *)talloc_steal(mem_ctx, outdata.dptr);
2693 initialise the ctdb daemon for client applications
2695 NOTE: In current code the daemon does not fork. This is for testing purposes only
2696 and to simplify the code.
2698 struct ctdb_context *ctdb_init(struct event_context *ev)
2701 struct ctdb_context *ctdb;
2703 ctdb = talloc_zero(ev, struct ctdb_context);
2705 DEBUG(DEBUG_ERR,(__location__ " talloc_zero failed.\n"));
2709 ctdb->idr = idr_init(ctdb);
2710 CTDB_NO_MEMORY_NULL(ctdb, ctdb->idr);
2712 ret = ctdb_set_socketname(ctdb, CTDB_PATH);
2714 DEBUG(DEBUG_ERR,(__location__ " ctdb_set_socketname failed.\n"));
2726 void ctdb_set_flags(struct ctdb_context *ctdb, unsigned flags)
2728 ctdb->flags |= flags;
2732 setup the local socket name
2734 int ctdb_set_socketname(struct ctdb_context *ctdb, const char *socketname)
2736 ctdb->daemon.name = talloc_strdup(ctdb, socketname);
2737 CTDB_NO_MEMORY(ctdb, ctdb->daemon.name);
2743 return the pnn of this node
2745 uint32_t ctdb_get_pnn(struct ctdb_context *ctdb)
2752 get the uptime of a remote node
2754 struct ctdb_client_control_state *
2755 ctdb_ctrl_uptime_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
2757 return ctdb_control_send(ctdb, destnode, 0,
2758 CTDB_CONTROL_UPTIME, 0, tdb_null,
2759 mem_ctx, &timeout, NULL);
2762 int ctdb_ctrl_uptime_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, struct ctdb_uptime **uptime)
2768 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
2769 if (ret != 0 || res != 0) {
2770 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_uptime_recv failed\n"));
2774 *uptime = (struct ctdb_uptime *)talloc_steal(mem_ctx, outdata.dptr);
2779 int ctdb_ctrl_uptime(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode, struct ctdb_uptime **uptime)
2781 struct ctdb_client_control_state *state;
2783 state = ctdb_ctrl_uptime_send(ctdb, mem_ctx, timeout, destnode);
2784 return ctdb_ctrl_uptime_recv(ctdb, mem_ctx, state, uptime);
2788 send a control to execute the "recovered" event script on a node
2790 int ctdb_ctrl_end_recovery(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
2795 ret = ctdb_control(ctdb, destnode, 0,
2796 CTDB_CONTROL_END_RECOVERY, 0, tdb_null,
2797 NULL, NULL, &status, &timeout, NULL);
2798 if (ret != 0 || status != 0) {
2799 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for end_recovery failed\n"));
2807 callback for the async helpers used when sending the same control
2808 to multiple nodes in parallell.
2810 static void async_callback(struct ctdb_client_control_state *state)
2812 struct client_async_data *data = talloc_get_type(state->async.private_data, struct client_async_data);
2813 struct ctdb_context *ctdb = talloc_get_type(state->ctdb, struct ctdb_context);
2817 uint32_t destnode = state->c->hdr.destnode;
2819 /* one more node has responded with recmode data */
2822 /* if we failed to push the db, then return an error and let
2823 the main loop try again.
2825 if (state->state != CTDB_CONTROL_DONE) {
2826 if ( !data->dont_log_errors) {
2827 DEBUG(DEBUG_ERR,("Async operation failed with state %d, opcode:%u\n", state->state, data->opcode));
2830 if (data->fail_callback) {
2831 data->fail_callback(ctdb, destnode, res, outdata,
2832 data->callback_data);
2837 state->async.fn = NULL;
2839 ret = ctdb_control_recv(ctdb, state, data, &outdata, &res, NULL);
2840 if ((ret != 0) || (res != 0)) {
2841 if ( !data->dont_log_errors) {
2842 DEBUG(DEBUG_ERR,("Async operation failed with ret=%d res=%d opcode=%u\n", ret, (int)res, data->opcode));
2845 if (data->fail_callback) {
2846 data->fail_callback(ctdb, destnode, res, outdata,
2847 data->callback_data);
2850 if ((ret == 0) && (data->callback != NULL)) {
2851 data->callback(ctdb, destnode, res, outdata,
2852 data->callback_data);
2857 void ctdb_client_async_add(struct client_async_data *data, struct ctdb_client_control_state *state)
2859 /* set up the callback functions */
2860 state->async.fn = async_callback;
2861 state->async.private_data = data;
2863 /* one more control to wait for to complete */
2868 /* wait for up to the maximum number of seconds allowed
2869 or until all nodes we expect a response from has replied
2871 int ctdb_client_async_wait(struct ctdb_context *ctdb, struct client_async_data *data)
2873 while (data->count > 0) {
2874 event_loop_once(ctdb->ev);
2876 if (data->fail_count != 0) {
2877 if (!data->dont_log_errors) {
2878 DEBUG(DEBUG_ERR,("Async wait failed - fail_count=%u\n",
2888 perform a simple control on the listed nodes
2889 The control cannot return data
2891 int ctdb_client_async_control(struct ctdb_context *ctdb,
2892 enum ctdb_controls opcode,
2895 struct timeval timeout,
2896 bool dont_log_errors,
2898 client_async_callback client_callback,
2899 client_async_callback fail_callback,
2900 void *callback_data)
2902 struct client_async_data *async_data;
2903 struct ctdb_client_control_state *state;
2906 async_data = talloc_zero(ctdb, struct client_async_data);
2907 CTDB_NO_MEMORY_FATAL(ctdb, async_data);
2908 async_data->dont_log_errors = dont_log_errors;
2909 async_data->callback = client_callback;
2910 async_data->fail_callback = fail_callback;
2911 async_data->callback_data = callback_data;
2912 async_data->opcode = opcode;
2914 num_nodes = talloc_get_size(nodes) / sizeof(uint32_t);
2916 /* loop over all nodes and send an async control to each of them */
2917 for (j=0; j<num_nodes; j++) {
2918 uint32_t pnn = nodes[j];
2920 state = ctdb_control_send(ctdb, pnn, srvid, opcode,
2921 0, data, async_data, &timeout, NULL);
2922 if (state == NULL) {
2923 DEBUG(DEBUG_ERR,(__location__ " Failed to call async control %u\n", (unsigned)opcode));
2924 talloc_free(async_data);
2928 ctdb_client_async_add(async_data, state);
2931 if (ctdb_client_async_wait(ctdb, async_data) != 0) {
2932 talloc_free(async_data);
2936 talloc_free(async_data);
2940 uint32_t *list_of_vnnmap_nodes(struct ctdb_context *ctdb,
2941 struct ctdb_vnn_map *vnn_map,
2942 TALLOC_CTX *mem_ctx,
2945 int i, j, num_nodes;
2948 for (i=num_nodes=0;i<vnn_map->size;i++) {
2949 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
2955 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
2956 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
2958 for (i=j=0;i<vnn_map->size;i++) {
2959 if (vnn_map->map[i] == ctdb->pnn && !include_self) {
2962 nodes[j++] = vnn_map->map[i];
2968 uint32_t *list_of_active_nodes(struct ctdb_context *ctdb,
2969 struct ctdb_node_map *node_map,
2970 TALLOC_CTX *mem_ctx,
2973 int i, j, num_nodes;
2976 for (i=num_nodes=0;i<node_map->num;i++) {
2977 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
2980 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
2986 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
2987 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
2989 for (i=j=0;i<node_map->num;i++) {
2990 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
2993 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
2996 nodes[j++] = node_map->nodes[i].pnn;
3002 uint32_t *list_of_active_nodes_except_pnn(struct ctdb_context *ctdb,
3003 struct ctdb_node_map *node_map,
3004 TALLOC_CTX *mem_ctx,
3007 int i, j, num_nodes;
3010 for (i=num_nodes=0;i<node_map->num;i++) {
3011 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3014 if (node_map->nodes[i].pnn == pnn) {
3020 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3021 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3023 for (i=j=0;i<node_map->num;i++) {
3024 if (node_map->nodes[i].flags & NODE_FLAGS_INACTIVE) {
3027 if (node_map->nodes[i].pnn == pnn) {
3030 nodes[j++] = node_map->nodes[i].pnn;
3036 uint32_t *list_of_connected_nodes(struct ctdb_context *ctdb,
3037 struct ctdb_node_map *node_map,
3038 TALLOC_CTX *mem_ctx,
3041 int i, j, num_nodes;
3044 for (i=num_nodes=0;i<node_map->num;i++) {
3045 if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
3048 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3054 nodes = talloc_array(mem_ctx, uint32_t, num_nodes);
3055 CTDB_NO_MEMORY_FATAL(ctdb, nodes);
3057 for (i=j=0;i<node_map->num;i++) {
3058 if (node_map->nodes[i].flags & NODE_FLAGS_DISCONNECTED) {
3061 if (node_map->nodes[i].pnn == ctdb->pnn && !include_self) {
3064 nodes[j++] = node_map->nodes[i].pnn;
3071 this is used to test if a pnn lock exists and if it exists will return
3072 the number of connections that pnn has reported or -1 if that recovery
3073 daemon is not running.
3076 ctdb_read_pnn_lock(int fd, int32_t pnn)
3081 lock.l_type = F_WRLCK;
3082 lock.l_whence = SEEK_SET;
3087 if (fcntl(fd, F_GETLK, &lock) != 0) {
3088 DEBUG(DEBUG_ERR, (__location__ " F_GETLK failed with %s\n", strerror(errno)));
3092 if (lock.l_type == F_UNLCK) {
3096 if (pread(fd, &c, 1, pnn) == -1) {
3097 DEBUG(DEBUG_CRIT,(__location__ " failed read pnn count - %s\n", strerror(errno)));
3105 get capabilities of a remote node
3107 struct ctdb_client_control_state *
3108 ctdb_ctrl_getcapabilities_send(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct timeval timeout, uint32_t destnode)
3110 return ctdb_control_send(ctdb, destnode, 0,
3111 CTDB_CONTROL_GET_CAPABILITIES, 0, tdb_null,
3112 mem_ctx, &timeout, NULL);
3115 int ctdb_ctrl_getcapabilities_recv(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, struct ctdb_client_control_state *state, uint32_t *capabilities)
3121 ret = ctdb_control_recv(ctdb, state, mem_ctx, &outdata, &res, NULL);
3122 if ( (ret != 0) || (res != 0) ) {
3123 DEBUG(DEBUG_ERR,(__location__ " ctdb_ctrl_getcapabilities_recv failed\n"));
3128 *capabilities = *((uint32_t *)outdata.dptr);
3134 int ctdb_ctrl_getcapabilities(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t *capabilities)
3136 struct ctdb_client_control_state *state;
3137 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
3140 state = ctdb_ctrl_getcapabilities_send(ctdb, tmp_ctx, timeout, destnode);
3141 ret = ctdb_ctrl_getcapabilities_recv(ctdb, tmp_ctx, state, capabilities);
3142 talloc_free(tmp_ctx);
3147 * check whether a transaction is active on a given db on a given node
3149 static int32_t ctdb_ctrl_transaction_active(struct ctdb_context *ctdb,
3157 indata.dptr = (uint8_t *)&db_id;
3158 indata.dsize = sizeof(db_id);
3160 ret = ctdb_control(ctdb, destnode, 0,
3161 CTDB_CONTROL_TRANS2_ACTIVE,
3162 0, indata, NULL, NULL, &status,
3166 DEBUG(DEBUG_ERR, (__location__ " ctdb control for transaction_active failed\n"));
3174 struct ctdb_transaction_handle {
3175 struct ctdb_db_context *ctdb_db;
3178 * we store the reads and writes done under a transaction:
3179 * - one list stores both reads and writes (m_all),
3180 * - the other just writes (m_write)
3182 struct ctdb_marshall_buffer *m_all;
3183 struct ctdb_marshall_buffer *m_write;
3186 /* start a transaction on a database */
3187 static int ctdb_transaction_destructor(struct ctdb_transaction_handle *h)
3189 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3193 /* start a transaction on a database */
3194 static int ctdb_transaction_fetch_start(struct ctdb_transaction_handle *h)
3196 struct ctdb_record_handle *rh;
3199 struct ctdb_ltdb_header header;
3200 TALLOC_CTX *tmp_ctx;
3201 const char *keyname = CTDB_TRANSACTION_LOCK_KEY;
3203 struct ctdb_db_context *ctdb_db = h->ctdb_db;
3207 key.dptr = discard_const(keyname);
3208 key.dsize = strlen(keyname);
3210 if (!ctdb_db->persistent) {
3211 DEBUG(DEBUG_ERR,(__location__ " Attempted transaction on non-persistent database\n"));
3216 tmp_ctx = talloc_new(h);
3218 rh = ctdb_fetch_lock(ctdb_db, tmp_ctx, key, NULL);
3220 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch_lock database\n"));
3221 talloc_free(tmp_ctx);
3225 status = ctdb_ctrl_transaction_active(ctdb_db->ctdb,
3229 DEBUG(DEBUG_NOTICE, (__location__ " transaction is active "
3230 "on db_id[0x%08x]. waiting for 1 second\n",
3232 talloc_free(tmp_ctx);
3238 * store the pid in the database:
3239 * it is not enough that the node is dmaster...
3242 data.dptr = (unsigned char *)&pid;
3243 data.dsize = sizeof(pid_t);
3244 ret = ctdb_ltdb_store(ctdb_db, key, &(rh->header), data);
3246 DEBUG(DEBUG_ERR, (__location__ " Failed to store pid in "
3247 "transaction record\n"));
3248 talloc_free(tmp_ctx);
3254 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
3256 DEBUG(DEBUG_ERR,(__location__ " Failed to start tdb transaction\n"));
3257 talloc_free(tmp_ctx);
3261 ret = ctdb_ltdb_fetch(ctdb_db, key, &header, tmp_ctx, &data);
3262 if (ret != 0 || header.dmaster != ctdb_db->ctdb->pnn) {
3263 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3264 talloc_free(tmp_ctx);
3268 if ((data.dsize != sizeof(pid_t)) || (*(pid_t *)(data.dptr) != pid)) {
3269 tdb_transaction_cancel(ctdb_db->ltdb->tdb);
3270 talloc_free(tmp_ctx);
3274 talloc_free(tmp_ctx);
3280 /* start a transaction on a database */
3281 struct ctdb_transaction_handle *ctdb_transaction_start(struct ctdb_db_context *ctdb_db,
3282 TALLOC_CTX *mem_ctx)
3284 struct ctdb_transaction_handle *h;
3287 h = talloc_zero(mem_ctx, struct ctdb_transaction_handle);
3289 DEBUG(DEBUG_ERR,(__location__ " oom for transaction handle\n"));
3293 h->ctdb_db = ctdb_db;
3295 ret = ctdb_transaction_fetch_start(h);
3301 talloc_set_destructor(h, ctdb_transaction_destructor);
3309 fetch a record inside a transaction
3311 int ctdb_transaction_fetch(struct ctdb_transaction_handle *h,
3312 TALLOC_CTX *mem_ctx,
3313 TDB_DATA key, TDB_DATA *data)
3315 struct ctdb_ltdb_header header;
3318 ZERO_STRUCT(header);
3320 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, mem_ctx, data);
3321 if (ret == -1 && header.dmaster == (uint32_t)-1) {
3322 /* record doesn't exist yet */
3331 if (!h->in_replay) {
3332 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 1, key, NULL, *data);
3333 if (h->m_all == NULL) {
3334 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3343 stores a record inside a transaction
3345 int ctdb_transaction_store(struct ctdb_transaction_handle *h,
3346 TDB_DATA key, TDB_DATA data)
3348 TALLOC_CTX *tmp_ctx = talloc_new(h);
3349 struct ctdb_ltdb_header header;
3353 ZERO_STRUCT(header);
3355 /* we need the header so we can update the RSN */
3356 ret = ctdb_ltdb_fetch(h->ctdb_db, key, &header, tmp_ctx, &olddata);
3357 if (ret == -1 && header.dmaster == (uint32_t)-1) {
3358 /* the record doesn't exist - create one with us as dmaster.
3359 This is only safe because we are in a transaction and this
3360 is a persistent database */
3361 ZERO_STRUCT(header);
3362 } else if (ret != 0) {
3363 DEBUG(DEBUG_ERR,(__location__ " Failed to fetch record\n"));
3364 talloc_free(tmp_ctx);
3368 if (data.dsize == olddata.dsize &&
3369 memcmp(data.dptr, olddata.dptr, data.dsize) == 0) {
3370 /* save writing the same data */
3371 talloc_free(tmp_ctx);
3375 header.dmaster = h->ctdb_db->ctdb->pnn;
3378 if (!h->in_replay) {
3379 h->m_all = ctdb_marshall_add(h, h->m_all, h->ctdb_db->db_id, 0, key, NULL, data);
3380 if (h->m_all == NULL) {
3381 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3382 talloc_free(tmp_ctx);
3387 h->m_write = ctdb_marshall_add(h, h->m_write, h->ctdb_db->db_id, 0, key, &header, data);
3388 if (h->m_write == NULL) {
3389 DEBUG(DEBUG_ERR,(__location__ " Failed to add to marshalling record\n"));
3390 talloc_free(tmp_ctx);
3394 ret = ctdb_ltdb_store(h->ctdb_db, key, &header, data);
3396 talloc_free(tmp_ctx);
3402 replay a transaction
3404 static int ctdb_replay_transaction(struct ctdb_transaction_handle *h)
3407 struct ctdb_rec_data *rec = NULL;
3409 h->in_replay = true;
3410 talloc_free(h->m_write);
3413 ret = ctdb_transaction_fetch_start(h);
3418 for (i=0;i<h->m_all->count;i++) {
3421 rec = ctdb_marshall_loop_next(h->m_all, rec, NULL, NULL, &key, &data);
3423 DEBUG(DEBUG_ERR, (__location__ " Out of records in ctdb_replay_transaction?\n"));
3427 if (rec->reqid == 0) {
3429 if (ctdb_transaction_store(h, key, data) != 0) {
3434 TALLOC_CTX *tmp_ctx = talloc_new(h);
3436 if (ctdb_transaction_fetch(h, tmp_ctx, key, &data2) != 0) {
3437 talloc_free(tmp_ctx);
3440 if (data2.dsize != data.dsize ||
3441 memcmp(data2.dptr, data.dptr, data.dsize) != 0) {
3442 /* the record has changed on us - we have to give up */
3443 talloc_free(tmp_ctx);
3446 talloc_free(tmp_ctx);
3453 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3459 commit a transaction
3461 int ctdb_transaction_commit(struct ctdb_transaction_handle *h)
3465 struct ctdb_context *ctdb = h->ctdb_db->ctdb;
3466 struct timeval timeout;
3467 enum ctdb_controls failure_control = CTDB_CONTROL_TRANS2_ERROR;
3469 talloc_set_destructor(h, NULL);
3471 /* our commit strategy is quite complex.
3473 - we first try to commit the changes to all other nodes
3475 - if that works, then we commit locally and we are done
3477 - if a commit on another node fails, then we need to cancel
3478 the transaction, then restart the transaction (thus
3479 opening a window of time for a pending recovery to
3480 complete), then replay the transaction, checking all the
3481 reads and writes (checking that reads give the same data,
3482 and writes succeed). Then we retry the transaction to the
3487 if (h->m_write == NULL) {
3488 /* no changes were made */
3489 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3494 /* tell ctdbd to commit to the other nodes */
3495 timeout = timeval_current_ofs(1, 0);
3496 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3497 retries==0?CTDB_CONTROL_TRANS2_COMMIT:CTDB_CONTROL_TRANS2_COMMIT_RETRY, 0,
3498 ctdb_marshall_finish(h->m_write), NULL, NULL, &status,
3500 if (ret != 0 || status != 0) {
3501 tdb_transaction_cancel(h->ctdb_db->ltdb->tdb);
3502 DEBUG(DEBUG_WARNING, (__location__ " transaction commit%s failed"
3503 ", retrying after 1 second...\n",
3504 (retries==0)?"":"retry "));
3508 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3510 /* work out what error code we will give if we
3511 have to fail the operation */
3512 switch ((enum ctdb_trans2_commit_error)status) {
3513 case CTDB_TRANS2_COMMIT_SUCCESS:
3514 case CTDB_TRANS2_COMMIT_SOMEFAIL:
3515 case CTDB_TRANS2_COMMIT_TIMEOUT:
3516 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3518 case CTDB_TRANS2_COMMIT_ALLFAIL:
3519 failure_control = CTDB_CONTROL_TRANS2_FINISHED;
3524 if (++retries == 10) {
3525 DEBUG(DEBUG_ERR,(__location__ " Giving up transaction on db 0x%08x after %d retries failure_control=%u\n",
3526 h->ctdb_db->db_id, retries, (unsigned)failure_control));
3527 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3528 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3529 tdb_null, NULL, NULL, NULL, NULL, NULL);
3534 if (ctdb_replay_transaction(h) != 0) {
3535 DEBUG(DEBUG_ERR,(__location__ " Failed to replay transaction\n"));
3536 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3537 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3538 tdb_null, NULL, NULL, NULL, NULL, NULL);
3544 failure_control = CTDB_CONTROL_TRANS2_ERROR;
3547 /* do the real commit locally */
3548 ret = tdb_transaction_commit(h->ctdb_db->ltdb->tdb);
3550 DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction\n"));
3551 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3552 failure_control, CTDB_CTRL_FLAG_NOREPLY,
3553 tdb_null, NULL, NULL, NULL, NULL, NULL);
3558 /* tell ctdbd that we are finished with our local commit */
3559 ctdb_control(ctdb, CTDB_CURRENT_NODE, h->ctdb_db->db_id,
3560 CTDB_CONTROL_TRANS2_FINISHED, CTDB_CTRL_FLAG_NOREPLY,
3561 tdb_null, NULL, NULL, NULL, NULL, NULL);
3567 recovery daemon ping to main daemon
3569 int ctdb_ctrl_recd_ping(struct ctdb_context *ctdb)
3574 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_PING, 0, tdb_null,
3575 ctdb, NULL, &res, NULL, NULL);
3576 if (ret != 0 || res != 0) {
3577 DEBUG(DEBUG_ERR,("Failed to send recd ping\n"));
3584 /* when forking the main daemon and the child process needs to connect back
3585 * to the daemon as a client process, this function can be used to change
3586 * the ctdb context from daemon into client mode
3588 int switch_from_server_to_client(struct ctdb_context *ctdb)
3592 /* shutdown the transport */
3593 if (ctdb->methods) {
3594 ctdb->methods->shutdown(ctdb);
3597 /* get a new event context */
3598 talloc_free(ctdb->ev);
3599 ctdb->ev = event_context_init(ctdb);
3601 close(ctdb->daemon.sd);
3602 ctdb->daemon.sd = -1;
3604 /* the client does not need to be realtime */
3605 if (ctdb->do_setsched) {
3606 ctdb_restore_scheduler(ctdb);
3609 /* initialise ctdb */
3610 ret = ctdb_socket_connect(ctdb);
3612 DEBUG(DEBUG_ALERT, (__location__ " Failed to init ctdb client\n"));
3620 tell the main daemon we are starting a new monitor event script
3622 int ctdb_ctrl_event_script_init(struct ctdb_context *ctdb)
3627 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_INIT, 0, tdb_null,
3628 ctdb, NULL, &res, NULL, NULL);
3629 if (ret != 0 || res != 0) {
3630 DEBUG(DEBUG_ERR,("Failed to send event_script_init\n"));
3638 tell the main daemon we are starting a new monitor event script
3640 int ctdb_ctrl_event_script_finished(struct ctdb_context *ctdb)
3645 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_FINISHED, 0, tdb_null,
3646 ctdb, NULL, &res, NULL, NULL);
3647 if (ret != 0 || res != 0) {
3648 DEBUG(DEBUG_ERR,("Failed to send event_script_init\n"));
3656 tell the main daemon we are starting to run an eventscript
3658 int ctdb_ctrl_event_script_start(struct ctdb_context *ctdb, const char *name)
3664 data.dptr = discard_const(name);
3665 data.dsize = strlen(name)+1;
3667 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_START, 0, data,
3668 ctdb, NULL, &res, NULL, NULL);
3669 if (ret != 0 || res != 0) {
3670 DEBUG(DEBUG_ERR,("Failed to send event_script_start\n"));
3678 tell the main daemon the status of the script we ran
3680 int ctdb_ctrl_event_script_stop(struct ctdb_context *ctdb, int32_t result)
3686 data.dptr = (uint8_t *)&result;
3687 data.dsize = sizeof(result);
3689 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_STOP, 0, data,
3690 ctdb, NULL, &res, NULL, NULL);
3691 if (ret != 0 || res != 0) {
3692 DEBUG(DEBUG_ERR,("Failed to send event_script_stop\n"));
3700 tell the main daemon a script was disabled
3702 int ctdb_ctrl_event_script_disabled(struct ctdb_context *ctdb, const char *name)
3708 data.dptr = discard_const(name);
3709 data.dsize = strlen(name)+1;
3711 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_EVENT_SCRIPT_DISABLED, 0, data,
3712 ctdb, NULL, &res, NULL, NULL);
3713 if (ret != 0 || res != 0) {
3714 DEBUG(DEBUG_ERR,("Failed to send event_script_disabeld\n"));
3722 get the status of running the monitor eventscripts
3724 int ctdb_ctrl_getscriptstatus(struct ctdb_context *ctdb,
3725 struct timeval timeout, uint32_t destnode,
3726 TALLOC_CTX *mem_ctx,
3727 struct ctdb_monitoring_wire **script_status)
3733 ret = ctdb_control(ctdb, destnode, 0,
3734 CTDB_CONTROL_GET_EVENT_SCRIPT_STATUS, 0, tdb_null,
3735 mem_ctx, &outdata, &res, &timeout, NULL);
3736 if (ret != 0 || res != 0 || outdata.dsize == 0) {
3737 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for getscriptstatus failed ret:%d res:%d\n", ret, res));
3741 *script_status = (struct ctdb_monitoring_wire *)talloc_memdup(mem_ctx, outdata.dptr, outdata.dsize);
3742 talloc_free(outdata.dptr);
3748 tell the main daemon how long it took to lock the reclock file
3750 int ctdb_ctrl_report_recd_lock_latency(struct ctdb_context *ctdb, struct timeval timeout, double latency)
3756 data.dptr = (uint8_t *)&latency;
3757 data.dsize = sizeof(latency);
3759 ret = ctdb_control(ctdb, CTDB_CURRENT_NODE, 0, CTDB_CONTROL_RECD_RECLOCK_LATENCY, 0, data,
3760 ctdb, NULL, &res, NULL, NULL);
3761 if (ret != 0 || res != 0) {
3762 DEBUG(DEBUG_ERR,("Failed to send recd reclock latency\n"));
3770 get the name of the reclock file
3772 int ctdb_ctrl_getreclock(struct ctdb_context *ctdb, struct timeval timeout,
3773 uint32_t destnode, TALLOC_CTX *mem_ctx,
3780 ret = ctdb_control(ctdb, destnode, 0,
3781 CTDB_CONTROL_GET_RECLOCK_FILE, 0, tdb_null,
3782 mem_ctx, &data, &res, &timeout, NULL);
3783 if (ret != 0 || res != 0) {
3787 if (data.dsize == 0) {
3790 *name = talloc_strdup(mem_ctx, discard_const(data.dptr));
3792 talloc_free(data.dptr);
3798 set the reclock filename for a node
3800 int ctdb_ctrl_setreclock(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *reclock)
3806 if (reclock == NULL) {
3810 data.dsize = strlen(reclock) + 1;
3811 data.dptr = discard_const(reclock);
3814 ret = ctdb_control(ctdb, destnode, 0,
3815 CTDB_CONTROL_SET_RECLOCK_FILE, 0, data,
3816 NULL, NULL, &res, &timeout, NULL);
3817 if (ret != 0 || res != 0) {
3818 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setreclock failed\n"));
3828 int ctdb_ctrl_stop_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3833 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_STOP_NODE, 0, tdb_null,
3834 ctdb, NULL, &res, &timeout, NULL);
3835 if (ret != 0 || res != 0) {
3836 DEBUG(DEBUG_ERR,("Failed to stop node\n"));
3846 int ctdb_ctrl_continue_node(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode)
3850 ret = ctdb_control(ctdb, destnode, 0, CTDB_CONTROL_CONTINUE_NODE, 0, tdb_null,
3851 ctdb, NULL, NULL, &timeout, NULL);
3853 DEBUG(DEBUG_ERR,("Failed to continue node\n"));
3861 set the natgw state for a node
3863 int ctdb_ctrl_setnatgwstate(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t natgwstate)
3869 data.dsize = sizeof(natgwstate);
3870 data.dptr = (uint8_t *)&natgwstate;
3872 ret = ctdb_control(ctdb, destnode, 0,
3873 CTDB_CONTROL_SET_NATGWSTATE, 0, data,
3874 NULL, NULL, &res, &timeout, NULL);
3875 if (ret != 0 || res != 0) {
3876 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setnatgwstate failed\n"));
3884 set the lmaster role for a node
3886 int ctdb_ctrl_setlmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t lmasterrole)
3892 data.dsize = sizeof(lmasterrole);
3893 data.dptr = (uint8_t *)&lmasterrole;
3895 ret = ctdb_control(ctdb, destnode, 0,
3896 CTDB_CONTROL_SET_LMASTERROLE, 0, data,
3897 NULL, NULL, &res, &timeout, NULL);
3898 if (ret != 0 || res != 0) {
3899 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setlmasterrole failed\n"));
3907 set the recmaster role for a node
3909 int ctdb_ctrl_setrecmasterrole(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t recmasterrole)
3915 data.dsize = sizeof(recmasterrole);
3916 data.dptr = (uint8_t *)&recmasterrole;
3918 ret = ctdb_control(ctdb, destnode, 0,
3919 CTDB_CONTROL_SET_RECMASTERROLE, 0, data,
3920 NULL, NULL, &res, &timeout, NULL);
3921 if (ret != 0 || res != 0) {
3922 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for setrecmasterrole failed\n"));
3929 /* enable an eventscript
3931 int ctdb_ctrl_enablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
3937 data.dsize = strlen(script) + 1;
3938 data.dptr = discard_const(script);
3940 ret = ctdb_control(ctdb, destnode, 0,
3941 CTDB_CONTROL_ENABLE_SCRIPT, 0, data,
3942 NULL, NULL, &res, &timeout, NULL);
3943 if (ret != 0 || res != 0) {
3944 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for enablescript failed\n"));
3951 /* disable an eventscript
3953 int ctdb_ctrl_disablescript(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, const char *script)
3959 data.dsize = strlen(script) + 1;
3960 data.dptr = discard_const(script);
3962 ret = ctdb_control(ctdb, destnode, 0,
3963 CTDB_CONTROL_DISABLE_SCRIPT, 0, data,
3964 NULL, NULL, &res, &timeout, NULL);
3965 if (ret != 0 || res != 0) {
3966 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for disablescript failed\n"));
3974 int ctdb_ctrl_set_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_ban_time *bantime)
3980 data.dsize = sizeof(*bantime);
3981 data.dptr = (uint8_t *)bantime;
3983 ret = ctdb_control(ctdb, destnode, 0,
3984 CTDB_CONTROL_SET_BAN_STATE, 0, data,
3985 NULL, NULL, &res, &timeout, NULL);
3986 if (ret != 0 || res != 0) {
3987 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
3995 int ctdb_ctrl_get_ban(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, TALLOC_CTX *mem_ctx, struct ctdb_ban_time **bantime)
4000 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4002 ret = ctdb_control(ctdb, destnode, 0,
4003 CTDB_CONTROL_GET_BAN_STATE, 0, tdb_null,
4004 tmp_ctx, &outdata, &res, &timeout, NULL);
4005 if (ret != 0 || res != 0) {
4006 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set ban state failed\n"));
4007 talloc_free(tmp_ctx);
4011 *bantime = (struct ctdb_ban_time *)talloc_steal(mem_ctx, outdata.dptr);
4012 talloc_free(tmp_ctx);
4018 int ctdb_ctrl_set_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, struct ctdb_db_priority *db_prio)
4023 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4025 data.dptr = (uint8_t*)db_prio;
4026 data.dsize = sizeof(*db_prio);
4028 ret = ctdb_control(ctdb, destnode, 0,
4029 CTDB_CONTROL_SET_DB_PRIORITY, 0, data,
4030 tmp_ctx, NULL, &res, &timeout, NULL);
4031 if (ret != 0 || res != 0) {
4032 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
4033 talloc_free(tmp_ctx);
4037 talloc_free(tmp_ctx);
4042 int ctdb_ctrl_get_db_priority(struct ctdb_context *ctdb, struct timeval timeout, uint32_t destnode, uint32_t db_id, uint32_t *priority)
4047 TALLOC_CTX *tmp_ctx = talloc_new(NULL);
4049 data.dptr = (uint8_t*)&db_id;
4050 data.dsize = sizeof(db_id);
4052 ret = ctdb_control(ctdb, destnode, 0,
4053 CTDB_CONTROL_GET_DB_PRIORITY, 0, data,
4054 tmp_ctx, NULL, &res, &timeout, NULL);
4055 if (ret != 0 || res < 0) {
4056 DEBUG(DEBUG_ERR,(__location__ " ctdb_control for set_db_priority failed\n"));
4057 talloc_free(tmp_ctx);
4065 talloc_free(tmp_ctx);