4 Copyright (C) Rusty Russell 2010
5 Copyright (C) Ronnie Sahlberg 2011
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 #include <sys/socket.h>
28 #include <sys/ioctl.h>
29 #include "libctdb_private.h"
31 #include "local_tdb.h"
33 #include <dlinklist.h>
34 #include <ctdb_protocol.h>
36 /* Remove type-safety macros. */
37 #undef ctdb_attachdb_send
38 #undef ctdb_readrecordlock_async
39 #undef ctdb_readonlyrecordlock_async
43 struct ctdb_lock *next, *prev;
45 struct ctdb_db *ctdb_db;
48 /* Is this a request for read-only lock ? */
51 /* This will always be set by the time user sees this. */
52 unsigned long held_magic;
53 struct ctdb_ltdb_header *hdr;
55 /* For convenience, we stash original callback here. */
56 ctdb_rrl_callback_t callback;
60 struct ctdb_connection *ctdb;
64 struct tdb_context *tdb;
66 ctdb_callback_t callback;
70 static void remove_lock(struct ctdb_connection *ctdb, struct ctdb_lock *lock)
72 DLIST_REMOVE(ctdb->locks, lock);
75 /* FIXME: for thread safety, need tid info too. */
76 static bool holding_lock(struct ctdb_connection *ctdb)
78 /* For the moment, you can't ever hold more than 1 lock. */
79 return (ctdb->locks != NULL);
82 static void add_lock(struct ctdb_connection *ctdb, struct ctdb_lock *lock)
84 DLIST_ADD(ctdb->locks, lock);
87 static void cleanup_locks(struct ctdb_connection *ctdb, struct ctdb_db *db)
89 struct ctdb_lock *i, *next;
91 for (i = ctdb->locks; i; i = next) {
92 /* Grab next pointer, as release_lock will free i */
94 if (i->ctdb_db == db) {
95 ctdb_release_lock(db, i);
100 /* FIXME: Could be in shared util code with rest of ctdb */
101 static void close_noerr(int fd)
108 /* FIXME: Could be in shared util code with rest of ctdb */
109 static void free_noerr(void *p)
116 /* FIXME: Could be in shared util code with rest of ctdb */
117 static void set_nonblocking(int fd)
120 v = fcntl(fd, F_GETFL, 0);
121 fcntl(fd, F_SETFL, v | O_NONBLOCK);
124 /* FIXME: Could be in shared util code with rest of ctdb */
125 static void set_close_on_exec(int fd)
128 v = fcntl(fd, F_GETFD, 0);
129 fcntl(fd, F_SETFD, v | FD_CLOEXEC);
132 static void set_pnn(struct ctdb_connection *ctdb,
133 struct ctdb_request *req,
136 if (!ctdb_getpnn_recv(ctdb, req, &ctdb->pnn)) {
137 DEBUG(ctdb, LOG_CRIT,
138 "ctdb_connect(async): failed to get pnn");
141 ctdb_request_free(req);
144 struct ctdb_connection *ctdb_connect(const char *addr,
145 ctdb_log_fn_t log_fn, void *log_priv)
147 struct ctdb_connection *ctdb;
148 struct sockaddr_un sun;
150 ctdb = malloc(sizeof(*ctdb));
152 /* With no format string, we hope it doesn't use ap! */
154 memset(&ap, 0, sizeof(ap));
156 log_fn(log_priv, LOG_ERR, "ctdb_connect: no memory", ap);
163 ctdb->inqueue = NULL;
164 ctdb->message_handlers = NULL;
166 ctdb->broken = false;
168 ctdb->log_priv = log_priv;
171 memset(&sun, 0, sizeof(sun));
172 sun.sun_family = AF_UNIX;
175 strncpy(sun.sun_path, addr, sizeof(sun.sun_path)-1);
176 ctdb->fd = socket(AF_UNIX, SOCK_STREAM, 0);
180 set_nonblocking(ctdb->fd);
181 set_close_on_exec(ctdb->fd);
183 if (connect(ctdb->fd, (struct sockaddr *)&sun, sizeof(sun)) == -1)
186 /* Immediately queue a request to get our pnn. */
187 if (!ctdb_getpnn_send(ctdb, CTDB_CURRENT_NODE, set_pnn, NULL))
193 close_noerr(ctdb->fd);
200 void ctdb_disconnect(struct ctdb_connection *ctdb)
202 struct ctdb_request *i;
204 DEBUG(ctdb, LOG_DEBUG, "ctdb_disconnect");
206 while ((i = ctdb->outq) != NULL) {
207 DLIST_REMOVE(ctdb->outq, i);
208 ctdb_request_free(i);
211 while ((i = ctdb->doneq) != NULL) {
212 DLIST_REMOVE(ctdb->doneq, i);
213 ctdb_request_free(i);
217 free_io_elem(ctdb->in);
219 remove_message_handlers(ctdb);
222 /* Just in case they try to reuse */
227 int ctdb_get_fd(struct ctdb_connection *ctdb)
232 int ctdb_which_events(struct ctdb_connection *ctdb)
241 struct ctdb_request *new_ctdb_request(struct ctdb_connection *ctdb, size_t len,
242 ctdb_callback_t cb, void *cbdata)
244 struct ctdb_request *req = malloc(sizeof(*req));
247 req->io = new_io_elem(len);
253 req->hdr.hdr = io_elem_data(req->io, NULL);
256 req->priv_data = cbdata;
258 req->extra_destructor = NULL;
262 void ctdb_request_free(struct ctdb_request *req)
264 struct ctdb_connection *ctdb = req->ctdb;
266 if (req->next || req->prev) {
267 DEBUG(ctdb, LOG_ALERT,
268 "ctdb_request_free: request not complete! ctdb_cancel? %p (id %u)",
269 req, req->hdr.hdr ? req->hdr.hdr->reqid : 0);
270 ctdb_cancel(ctdb, req);
273 if (req->extra_destructor) {
274 req->extra_destructor(ctdb, req);
277 free_io_elem(req->reply);
279 free_io_elem(req->io);
283 /* Sanity-checking wrapper for reply. */
284 static struct ctdb_reply_call *unpack_reply_call(struct ctdb_request *req,
288 struct ctdb_reply_call *inhdr = io_elem_data(req->reply, &len);
290 /* Library user error if this isn't a reply to a call. */
291 if (req->hdr.hdr->operation != CTDB_REQ_CALL) {
293 DEBUG(req->ctdb, LOG_ALERT,
294 "This was not a ctdbd call request: operation %u",
295 req->hdr.hdr->operation);
299 if (req->hdr.call->callid != callid) {
301 DEBUG(req->ctdb, LOG_ALERT,
302 "This was not a ctdbd %u call request: %u",
303 callid, req->hdr.call->callid);
307 /* ctdbd or our error if this isn't a reply call. */
308 if (len < sizeof(*inhdr) || inhdr->hdr.operation != CTDB_REPLY_CALL) {
310 DEBUG(req->ctdb, LOG_CRIT,
311 "Invalid ctdbd call reply: len %zu, operation %u",
312 len, inhdr->hdr.operation);
319 /* Sanity-checking wrapper for reply. */
320 struct ctdb_reply_control *unpack_reply_control(struct ctdb_request *req,
321 enum ctdb_controls control)
324 struct ctdb_reply_control *inhdr = io_elem_data(req->reply, &len);
326 /* Library user error if this isn't a reply to a call. */
327 if (len < sizeof(*inhdr)) {
329 DEBUG(req->ctdb, LOG_ALERT,
330 "Short ctdbd control reply: %zu bytes", len);
333 if (req->hdr.hdr->operation != CTDB_REQ_CONTROL) {
335 DEBUG(req->ctdb, LOG_ALERT,
336 "This was not a ctdbd control request: operation %u",
337 req->hdr.hdr->operation);
341 /* ... or if it was a different control from what we expected. */
342 if (req->hdr.control->opcode != control) {
344 DEBUG(req->ctdb, LOG_ALERT,
345 "This was not an opcode %u ctdbd control request: %u",
346 control, req->hdr.control->opcode);
350 /* ctdbd or our error if this isn't a reply call. */
351 if (inhdr->hdr.operation != CTDB_REPLY_CONTROL) {
353 DEBUG(req->ctdb, LOG_CRIT,
354 "Invalid ctdbd control reply: operation %u",
355 inhdr->hdr.operation);
362 static void handle_incoming(struct ctdb_connection *ctdb, struct io_elem *in)
364 struct ctdb_req_header *hdr;
366 struct ctdb_request *i;
368 hdr = io_elem_data(in, &len);
369 /* FIXME: use len to check packet! */
371 if (hdr->operation == CTDB_REQ_MESSAGE) {
372 deliver_message(ctdb, hdr);
376 for (i = ctdb->doneq; i; i = i->next) {
377 if (i->hdr.hdr->reqid == hdr->reqid) {
378 DLIST_REMOVE(ctdb->doneq, i);
380 i->callback(ctdb, i, i->priv_data);
384 DEBUG(ctdb, LOG_WARNING,
385 "Unexpected ctdbd request reply: operation %u reqid %u",
386 hdr->operation, hdr->reqid);
390 /* Remove "harmless" errors. */
391 static ssize_t real_error(ssize_t ret)
393 if (ret < 0 && (errno == EINTR || errno == EWOULDBLOCK))
398 bool ctdb_service(struct ctdb_connection *ctdb, int revents)
404 if (holding_lock(ctdb)) {
405 DEBUG(ctdb, LOG_ALERT, "Do not block while holding lock!");
408 if (revents & POLLOUT) {
410 if (real_error(write_io_elem(ctdb->fd,
411 ctdb->outq->io)) < 0) {
413 "ctdb_service: error writing to ctdbd");
417 if (io_elem_finished(ctdb->outq->io)) {
418 struct ctdb_request *done = ctdb->outq;
419 DLIST_REMOVE(ctdb->outq, done);
420 /* We add at the head: any dead ones
422 DLIST_ADD(ctdb->doneq, done);
427 while (revents & POLLIN) {
431 if (ioctl(ctdb->fd, FIONREAD, &num_ready) != 0) {
433 "ctdb_service: ioctl(FIONREAD) %d", errno);
437 if (num_ready == 0) {
438 /* the descriptor has been closed or we have all our data */
444 ctdb->in = new_io_elem(sizeof(struct ctdb_req_header));
447 "ctdb_service: allocating readbuf");
453 ret = read_io_elem(ctdb->fd, ctdb->in);
454 if (real_error(ret) < 0 || ret == 0) {
455 /* They closed fd? */
459 "ctdb_service: error reading from ctdbd");
462 } else if (ret < 0) {
463 /* No progress, stop loop. */
465 } else if (io_elem_finished(ctdb->in)) {
466 io_elem_queue(ctdb, ctdb->in);
472 while (ctdb->inqueue != NULL) {
473 struct io_elem *io = ctdb->inqueue;
475 io_elem_dequeue(ctdb, io);
476 handle_incoming(ctdb, io);
482 /* This is inefficient. We could pull in idtree.c. */
483 static bool reqid_used(const struct ctdb_connection *ctdb, uint32_t reqid)
485 struct ctdb_request *i;
487 for (i = ctdb->outq; i; i = i->next) {
488 if (i->hdr.hdr->reqid == reqid) {
492 for (i = ctdb->doneq; i; i = i->next) {
493 if (i->hdr.hdr->reqid == reqid) {
500 uint32_t new_reqid(struct ctdb_connection *ctdb)
502 while (reqid_used(ctdb, ctdb->next_id)) {
505 return ctdb->next_id++;
508 struct ctdb_request *new_ctdb_control_request(struct ctdb_connection *ctdb,
511 const void *extra_data,
513 ctdb_callback_t callback,
516 struct ctdb_request *req;
517 struct ctdb_req_control *pkt;
519 req = new_ctdb_request(
520 ctdb, offsetof(struct ctdb_req_control, data) + extra,
525 io_elem_init_req_header(req->io,
526 CTDB_REQ_CONTROL, destnode, new_reqid(ctdb));
528 pkt = req->hdr.control;
530 pkt->opcode = opcode;
534 pkt->datalen = extra;
535 memcpy(pkt->data, extra_data, extra);
536 DLIST_ADD(ctdb->outq, req);
540 void ctdb_cancel_callback(struct ctdb_connection *ctdb,
541 struct ctdb_request *req,
544 ctdb_request_free(req);
547 void ctdb_cancel(struct ctdb_connection *ctdb, struct ctdb_request *req)
549 if (!req->next && !req->prev) {
550 DEBUG(ctdb, LOG_ALERT,
551 "ctdb_cancel: request completed! ctdb_request_free? %p (id %u)",
552 req, req->hdr.hdr ? req->hdr.hdr->reqid : 0);
553 ctdb_request_free(req);
557 DEBUG(ctdb, LOG_DEBUG, "ctdb_cancel: %p (id %u)",
558 req, req->hdr.hdr ? req->hdr.hdr->reqid : 0);
560 /* FIXME: If it's not sent, we could just free it right now. */
561 req->callback = ctdb_cancel_callback;
564 void ctdb_detachdb(struct ctdb_connection *ctdb, struct ctdb_db *db)
566 cleanup_locks(ctdb, db);
571 static void destroy_req_db(struct ctdb_connection *ctdb,
572 struct ctdb_request *req);
573 static void attachdb_done(struct ctdb_connection *ctdb,
574 struct ctdb_request *req,
576 static void attachdb_getdbpath_done(struct ctdb_connection *ctdb,
577 struct ctdb_request *req,
580 struct ctdb_request *
581 ctdb_attachdb_send(struct ctdb_connection *ctdb,
582 const char *name, bool persistent, uint32_t tdb_flags,
583 ctdb_callback_t callback, void *private_data)
585 struct ctdb_request *req;
589 /* FIXME: Search if db already open. */
590 db = malloc(sizeof(*db));
596 opcode = CTDB_CONTROL_DB_ATTACH_PERSISTENT;
598 opcode = CTDB_CONTROL_DB_ATTACH;
601 req = new_ctdb_control_request(ctdb, opcode, CTDB_CURRENT_NODE, name,
602 strlen(name) + 1, attachdb_done, db);
605 "ctdb_attachdb_send: failed allocating DB_ATTACH");
611 db->tdb_flags = tdb_flags;
612 db->persistent = persistent;
613 db->callback = callback;
614 db->private_data = private_data;
616 req->extra_destructor = destroy_req_db;
617 /* This is set non-NULL when we succeed, see ctdb_attachdb_recv */
620 /* Flags get overloaded into srvid. */
621 req->hdr.control->srvid = tdb_flags;
622 DEBUG(db->ctdb, LOG_DEBUG,
623 "ctdb_attachdb_send: DB_ATTACH request %p", req);
627 static void destroy_req_db(struct ctdb_connection *ctdb,
628 struct ctdb_request *req)
630 /* Incomplete db is in priv_data. */
631 free(req->priv_data);
632 /* second request is chained off this one. */
634 ctdb_request_free(req->extra);
638 static void attachdb_done(struct ctdb_connection *ctdb,
639 struct ctdb_request *req,
642 struct ctdb_db *db = _db;
643 struct ctdb_request *req2;
644 struct ctdb_reply_control *reply;
645 enum ctdb_controls control = CTDB_CONTROL_DB_ATTACH;
647 if (db->persistent) {
648 control = CTDB_CONTROL_DB_ATTACH_PERSISTENT;
651 reply = unpack_reply_control(req, control);
652 if (!reply || reply->status != 0) {
655 "ctdb_attachdb_send(async): DB_ATTACH status %i",
658 /* We failed. Hand request to user and have them discover it
659 * via ctdb_attachdb_recv. */
660 db->callback(ctdb, req, db->private_data);
663 db->id = *(uint32_t *)reply->data;
665 /* Now we do another call, to get the dbpath. */
666 req2 = new_ctdb_control_request(db->ctdb, CTDB_CONTROL_GETDBPATH,
668 &db->id, sizeof(db->id),
669 attachdb_getdbpath_done, db);
671 DEBUG(db->ctdb, LOG_ERR,
672 "ctdb_attachdb_send(async): failed to allocate");
673 db->callback(ctdb, req, db->private_data);
678 DEBUG(db->ctdb, LOG_DEBUG,
679 "ctdb_attachdb_send(async): created getdbpath request");
682 static void attachdb_getdbpath_done(struct ctdb_connection *ctdb,
683 struct ctdb_request *req,
686 struct ctdb_db *db = _db;
688 /* Do callback on original request. */
689 db->callback(ctdb, req->extra, db->private_data);
692 struct ctdb_db *ctdb_attachdb_recv(struct ctdb_connection *ctdb,
693 struct ctdb_request *req)
695 struct ctdb_request *dbpath_req = req->extra;
696 struct ctdb_reply_control *reply;
697 struct ctdb_db *db = req->priv_data;
698 uint32_t tdb_flags = db->tdb_flags;
699 struct tdb_logging_context log;
701 /* Never sent the dbpath request? We've failed. */
703 /* FIXME: Save errno? */
708 reply = unpack_reply_control(dbpath_req, CTDB_CONTROL_GETDBPATH);
712 if (reply->status != 0) {
713 DEBUG(db->ctdb, LOG_ERR,
714 "ctdb_attachdb_recv: reply status %i", reply->status);
718 tdb_flags = db->persistent ? TDB_DEFAULT : TDB_NOSYNC;
719 tdb_flags |= TDB_DISALLOW_NESTING;
721 log.log_fn = ctdb_tdb_log_bridge;
722 log.log_private = ctdb;
723 db->tdb = tdb_open_ex((char *)reply->data, 0, tdb_flags, O_RDWR, 0,
725 if (db->tdb == NULL) {
726 DEBUG(db->ctdb, LOG_ERR,
727 "ctdb_attachdb_recv: failed to tdb_open %s",
728 (char *)reply->data);
732 /* Finally, separate the db from the request (see destroy_req_db). */
733 req->priv_data = NULL;
734 DEBUG(db->ctdb, LOG_DEBUG,
735 "ctdb_attachdb_recv: db %p, tdb %s", db, (char *)reply->data);
739 static unsigned long lock_magic(struct ctdb_lock *lock)
741 /* A non-zero magic specific to this structure. */
742 return ((unsigned long)lock->key.dptr
743 ^ (((unsigned long)lock->key.dptr) << 16)
744 ^ 0xBADC0FFEEBADC0DEULL)
748 /* This is only called on locks before they're held. */
749 static void free_lock(struct ctdb_lock *lock)
751 if (lock->held_magic) {
752 DEBUG(lock->ctdb_db->ctdb, LOG_ALERT,
753 "free_lock invalid lock %p", lock);
760 void ctdb_release_lock(struct ctdb_db *ctdb_db, struct ctdb_lock *lock)
762 if (lock->held_magic != lock_magic(lock)) {
763 DEBUG(lock->ctdb_db->ctdb, LOG_ALERT,
764 "ctdb_release_lock invalid lock %p", lock);
765 } else if (lock->ctdb_db != ctdb_db) {
767 DEBUG(ctdb_db->ctdb, LOG_ALERT,
768 "ctdb_release_lock: wrong ctdb_db.");
770 tdb_chainunlock(lock->ctdb_db->tdb, lock->key);
771 DEBUG(lock->ctdb_db->ctdb, LOG_DEBUG,
772 "ctdb_release_lock %p", lock);
773 remove_lock(lock->ctdb_db->ctdb, lock);
775 lock->held_magic = 0;
780 /* We keep the lock if local node is the dmaster. */
781 static bool try_readrecordlock(struct ctdb_lock *lock, TDB_DATA *data)
783 struct ctdb_ltdb_header *hdr;
785 if (tdb_chainlock(lock->ctdb_db->tdb, lock->key) != 0) {
786 DEBUG(lock->ctdb_db->ctdb, LOG_WARNING,
787 "ctdb_readrecordlock_async: failed to chainlock");
791 hdr = ctdb_local_fetch(lock->ctdb_db->tdb, lock->key, data);
792 if (hdr && lock->readonly && (hdr->flags & CTDB_REC_RO_HAVE_READONLY) ) {
793 DEBUG(lock->ctdb_db->ctdb, LOG_DEBUG,
794 "ctdb_readrecordlock_async: got local lock for ro");
795 lock->held_magic = lock_magic(lock);
797 add_lock(lock->ctdb_db->ctdb, lock);
800 if (hdr && hdr->dmaster == lock->ctdb_db->ctdb->pnn) {
801 DEBUG(lock->ctdb_db->ctdb, LOG_DEBUG,
802 "ctdb_readrecordlock_async: got local lock");
803 lock->held_magic = lock_magic(lock);
805 add_lock(lock->ctdb_db->ctdb, lock);
809 /* we dont have the record locally,
810 * drop to writelock to force a migration
812 if (!hdr && lock->readonly) {
813 lock->readonly = false;
816 tdb_chainunlock(lock->ctdb_db->tdb, lock->key);
821 /* If they shutdown before we hand them the lock, we free it here. */
822 static void destroy_lock(struct ctdb_connection *ctdb,
823 struct ctdb_request *req)
825 free_lock(req->extra);
828 static void readrecordlock_retry(struct ctdb_connection *ctdb,
829 struct ctdb_request *req, void *private)
831 struct ctdb_lock *lock = req->extra;
832 struct ctdb_reply_call *reply;
835 /* OK, we've received reply to fetch-with-header migration */
836 reply = unpack_reply_call(req, CTDB_FETCH_WITH_HEADER_FUNC);
837 if (!reply || reply->status != 0) {
840 "ctdb_readrecordlock_async(async):"
841 " FETCH_WITH_HEADER_FUNC returned %i", reply->status);
843 lock->callback(lock->ctdb_db, NULL, tdb_null, private);
844 ctdb_request_free(req); /* Also frees lock. */
848 /* Can we get lock now? */
849 if (try_readrecordlock(lock, &data)) {
850 /* Now it's their responsibility to free lock & request! */
851 req->extra_destructor = NULL;
852 lock->callback(lock->ctdb_db, lock, data, private);
853 ctdb_request_free(req);
857 /* Retransmit the same request again (we lost race). */
858 io_elem_reset(req->io);
859 DLIST_ADD(ctdb->outq, req);
863 ctdb_readrecordlock_internal(struct ctdb_db *ctdb_db, TDB_DATA key,
865 ctdb_rrl_callback_t callback, void *cbdata)
867 struct ctdb_request *req;
868 struct ctdb_lock *lock;
871 if (holding_lock(ctdb_db->ctdb)) {
872 DEBUG(ctdb_db->ctdb, LOG_ALERT,
873 "ctdb_readrecordlock_async: already holding lock");
878 lock = malloc(sizeof(*lock) + key.dsize);
880 DEBUG(ctdb_db->ctdb, LOG_ERR,
881 "ctdb_readrecordlock_async: lock allocation failed");
884 lock->key.dptr = (void *)(lock + 1);
885 memcpy(lock->key.dptr, key.dptr, key.dsize);
886 lock->key.dsize = key.dsize;
887 lock->ctdb_db = ctdb_db;
889 lock->held_magic = 0;
890 lock->readonly = readonly;
893 if (try_readrecordlock(lock, &data)) {
894 callback(ctdb_db, lock, data, cbdata);
898 /* Slow path: create request. */
899 req = new_ctdb_request(
901 offsetof(struct ctdb_req_call, data) + key.dsize,
902 readrecordlock_retry, cbdata);
904 DEBUG(ctdb_db->ctdb, LOG_ERR,
905 "ctdb_readrecordlock_async: allocation failed");
910 req->extra_destructor = destroy_lock;
911 /* We store the original callback in the lock, and use our own. */
912 lock->callback = callback;
914 io_elem_init_req_header(req->io, CTDB_REQ_CALL, CTDB_CURRENT_NODE,
915 new_reqid(ctdb_db->ctdb));
917 if (lock->readonly) {
918 req->hdr.call->flags = CTDB_WANT_READONLY;
920 req->hdr.call->flags = CTDB_IMMEDIATE_MIGRATION;
922 req->hdr.call->db_id = ctdb_db->id;
923 req->hdr.call->callid = CTDB_FETCH_WITH_HEADER_FUNC;
924 req->hdr.call->hopcount = 0;
925 req->hdr.call->keylen = key.dsize;
926 req->hdr.call->calldatalen = 0;
927 memcpy(req->hdr.call->data, key.dptr, key.dsize);
928 DLIST_ADD(ctdb_db->ctdb->outq, req);
933 ctdb_readrecordlock_async(struct ctdb_db *ctdb_db, TDB_DATA key,
934 ctdb_rrl_callback_t callback, void *cbdata)
936 return ctdb_readrecordlock_internal(ctdb_db, key,
942 ctdb_readonlyrecordlock_async(struct ctdb_db *ctdb_db, TDB_DATA key,
943 ctdb_rrl_callback_t callback, void *cbdata)
945 return ctdb_readrecordlock_internal(ctdb_db, key,
950 bool ctdb_writerecord(struct ctdb_db *ctdb_db,
951 struct ctdb_lock *lock, TDB_DATA data)
953 if (lock->readonly) {
955 DEBUG(ctdb_db->ctdb, LOG_ALERT,
956 "ctdb_writerecord: Can not write, read-only record.");
960 if (lock->ctdb_db != ctdb_db) {
962 DEBUG(ctdb_db->ctdb, LOG_ALERT,
963 "ctdb_writerecord: Can not write, wrong ctdb_db.");
967 if (lock->held_magic != lock_magic(lock)) {
969 DEBUG(ctdb_db->ctdb, LOG_ALERT,
970 "ctdb_writerecord: Can not write. Lock has been released.");
974 if (ctdb_db->persistent) {
976 DEBUG(ctdb_db->ctdb, LOG_ALERT,
977 "ctdb_writerecord: cannot write to persistent db");
981 switch (ctdb_local_store(ctdb_db->tdb, lock->key, lock->hdr, data)) {
983 DEBUG(ctdb_db->ctdb, LOG_DEBUG,
984 "ctdb_writerecord: optimized away noop write.");
992 DEBUG(ctdb_db->ctdb, LOG_CRIT,
993 "ctdb_writerecord: out of memory.");
996 DEBUG(ctdb_db->ctdb, LOG_ALERT,
997 "ctdb_writerecord: record changed under lock?");
999 default: /* TDB already logged. */
1007 struct ctdb_traverse_state {
1008 struct ctdb_request *handle;
1009 struct ctdb_db *ctdb_db;
1012 ctdb_traverse_callback_t callback;
1016 static void traverse_remhnd_cb(struct ctdb_connection *ctdb,
1017 struct ctdb_request *req, void *private_data)
1019 struct ctdb_traverse_state *state = private_data;
1021 if (!ctdb_remove_message_handler_recv(ctdb, state->handle)) {
1022 DEBUG(ctdb, LOG_ERR,
1023 "Failed to remove message handler for"
1025 state->callback(state->ctdb_db->ctdb, state->ctdb_db,
1026 TRAVERSE_STATUS_ERROR,
1030 ctdb_request_free(state->handle);
1031 state->handle = NULL;
1035 static void msg_h(struct ctdb_connection *ctdb, uint64_t srvid,
1036 TDB_DATA data, void *private_data)
1038 struct ctdb_traverse_state *state = private_data;
1039 struct ctdb_db *ctdb_db = state->ctdb_db;
1040 struct ctdb_rec_data *d = (struct ctdb_rec_data *)data.dptr;
1043 if (data.dsize < sizeof(uint32_t) ||
1044 d->length != data.dsize) {
1045 DEBUG(ctdb, LOG_ERR,
1046 "Bad data size %u in traverse_handler",
1047 (unsigned)data.dsize);
1048 state->callback(state->ctdb_db->ctdb, state->ctdb_db,
1049 TRAVERSE_STATUS_ERROR,
1052 state->handle = ctdb_remove_message_handler_send(
1053 state->ctdb_db->ctdb, state->srvid,
1055 traverse_remhnd_cb, state);
1059 key.dsize = d->keylen;
1060 key.dptr = &d->data[0];
1061 data.dsize = d->datalen;
1062 data.dptr = &d->data[d->keylen];
1064 if (key.dsize == 0 && data.dsize == 0) {
1065 state->callback(state->ctdb_db->ctdb, state->ctdb_db,
1066 TRAVERSE_STATUS_FINISHED,
1069 state->handle = ctdb_remove_message_handler_send(
1070 state->ctdb_db->ctdb, state->srvid,
1072 traverse_remhnd_cb, state);
1076 if (data.dsize <= sizeof(struct ctdb_ltdb_header)) {
1077 /* empty records are deleted records in ctdb */
1081 data.dsize -= sizeof(struct ctdb_ltdb_header);
1082 data.dptr += sizeof(struct ctdb_ltdb_header);
1084 if (state->callback(ctdb, ctdb_db,
1085 TRAVERSE_STATUS_RECORD,
1086 key, data, state->cbdata) != 0) {
1087 state->handle = ctdb_remove_message_handler_send(
1088 state->ctdb_db->ctdb, state->srvid,
1090 traverse_remhnd_cb, state);
1095 static void traverse_start_cb(struct ctdb_connection *ctdb,
1096 struct ctdb_request *req, void *private_data)
1098 struct ctdb_traverse_state *state = private_data;
1100 ctdb_request_free(state->handle);
1101 state->handle = NULL;
1104 static void traverse_msghnd_cb(struct ctdb_connection *ctdb,
1105 struct ctdb_request *req, void *private_data)
1107 struct ctdb_traverse_state *state = private_data;
1108 struct ctdb_db *ctdb_db = state->ctdb_db;
1109 struct ctdb_traverse_start t;
1111 if (!ctdb_set_message_handler_recv(ctdb, state->handle)) {
1112 DEBUG(ctdb, LOG_ERR,
1113 "Failed to register message handler for"
1115 state->callback(state->ctdb_db->ctdb, state->ctdb_db,
1116 TRAVERSE_STATUS_ERROR,
1119 ctdb_request_free(state->handle);
1120 state->handle = NULL;
1124 ctdb_request_free(state->handle);
1125 state->handle = NULL;
1127 t.db_id = ctdb_db->id;
1128 t.srvid = state->srvid;
1131 state->handle = new_ctdb_control_request(ctdb,
1132 CTDB_CONTROL_TRAVERSE_START,
1135 traverse_start_cb, state);
1136 if (state->handle == NULL) {
1137 DEBUG(ctdb, LOG_ERR,
1138 "ctdb_traverse_async:"
1139 " failed to send traverse_start control");
1140 state->callback(state->ctdb_db->ctdb, state->ctdb_db,
1141 TRAVERSE_STATUS_ERROR,
1144 state->handle = ctdb_remove_message_handler_send(
1145 state->ctdb_db->ctdb, state->srvid,
1147 traverse_remhnd_cb, state);
1152 bool ctdb_traverse_async(struct ctdb_db *ctdb_db,
1153 ctdb_traverse_callback_t callback, void *cbdata)
1155 struct ctdb_connection *ctdb = ctdb_db->ctdb;
1156 struct ctdb_traverse_state *state;
1157 static uint32_t tid = 0;
1159 state = malloc(sizeof(struct ctdb_traverse_state));
1160 if (state == NULL) {
1161 DEBUG(ctdb, LOG_ERR,
1162 "ctdb_traverse_async: no memory."
1163 " allocate state failed");
1168 state->srvid = CTDB_SRVID_TRAVERSE_RANGE|tid;
1170 state->callback = callback;
1171 state->cbdata = cbdata;
1172 state->ctdb_db = ctdb_db;
1174 state->handle = ctdb_set_message_handler_send(ctdb_db->ctdb,
1177 traverse_msghnd_cb, state);
1178 if (state->handle == NULL) {
1179 DEBUG(ctdb, LOG_ERR,
1180 "ctdb_traverse_async:"
1181 " failed ctdb_set_message_handler_send");
1189 int ctdb_num_out_queue(struct ctdb_connection *ctdb)
1191 struct ctdb_request *req;
1194 for (i = 0, req = ctdb->outq; req; req = req->next, i++)
1200 int ctdb_num_in_flight(struct ctdb_connection *ctdb)
1202 struct ctdb_request *req;
1205 for (i = 0, req = ctdb->doneq; req; req = req->next, i++)
1211 int ctdb_num_active(struct ctdb_connection *ctdb)
1213 return ctdb_num_out_queue(ctdb)
1214 + ctdb_num_in_flight(ctdb);