4 Copyright (C) Rusty Russell 2010
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 #include <sys/socket.h>
27 #include "libctdb_private.h"
29 #include "local_tdb.h"
31 #include <dlinklist.h>
32 #include <ctdb_protocol.h>
34 /* Remove type-safety macros. */
35 #undef ctdb_attachdb_send
36 #undef ctdb_readrecordlock_async
39 /* FIXME: Could be in shared util code with rest of ctdb */
40 static void close_noerr(int fd)
47 /* FIXME: Could be in shared util code with rest of ctdb */
48 static void free_noerr(void *p)
55 /* FIXME: Could be in shared util code with rest of ctdb */
56 static void set_nonblocking(int fd)
59 v = fcntl(fd, F_GETFL, 0);
60 fcntl(fd, F_SETFL, v | O_NONBLOCK);
63 /* FIXME: Could be in shared util code with rest of ctdb */
64 static void set_close_on_exec(int fd)
67 v = fcntl(fd, F_GETFD, 0);
68 fcntl(fd, F_SETFD, v | FD_CLOEXEC);
71 static void set_pnn(struct ctdb_connection *ctdb,
72 struct ctdb_request *req,
75 if (ctdb_getpnn_recv(ctdb, req, &ctdb->pnn) != 0) {
77 "ctdb_connect(async): failed to get pnn");
80 ctdb_request_free(ctdb, req);
83 struct ctdb_connection *ctdb_connect(const char *addr,
84 ctdb_log_fn_t log_fn, void *log_priv)
86 struct ctdb_connection *ctdb;
87 struct sockaddr_un sun;
89 ctdb = malloc(sizeof(*ctdb));
91 /* With no format string, we hope it doesn't use ap! */
93 memset(&ap, 0, sizeof(ap));
95 log_fn(log_priv, LOG_ERR, "ctdb_connect: no memory", ap);
101 ctdb->message_handlers = NULL;
103 ctdb->broken = false;
105 ctdb->log_priv = log_priv;
107 memset(&sun, 0, sizeof(sun));
108 sun.sun_family = AF_UNIX;
111 strncpy(sun.sun_path, addr, sizeof(sun.sun_path));
112 ctdb->fd = socket(AF_UNIX, SOCK_STREAM, 0);
116 set_nonblocking(ctdb->fd);
117 set_close_on_exec(ctdb->fd);
119 if (connect(ctdb->fd, (struct sockaddr *)&sun, sizeof(sun)) == -1)
122 /* Immediately queue a request to get our pnn. */
123 if (!ctdb_getpnn_send(ctdb, CTDB_CURRENT_NODE, set_pnn, NULL))
129 close_noerr(ctdb->fd);
136 int ctdb_get_fd(struct ctdb_connection *ctdb)
141 int ctdb_which_events(struct ctdb_connection *ctdb)
150 struct ctdb_request *new_ctdb_request(size_t len,
151 ctdb_callback_t cb, void *cbdata)
153 struct ctdb_request *req = malloc(sizeof(*req));
156 req->io = new_io_elem(len);
161 req->hdr.hdr = io_elem_data(req->io, NULL);
164 req->priv_data = cbdata;
166 req->extra_destructor = NULL;
170 void ctdb_request_free(struct ctdb_connection *ctdb, struct ctdb_request *req)
172 if (req->extra_destructor) {
173 req->extra_destructor(ctdb, req);
176 free_io_elem(req->reply);
178 free_io_elem(req->io);
182 /* Sanity-checking wrapper for reply. */
183 static struct ctdb_reply_call *unpack_reply_call(struct ctdb_connection *ctdb,
184 struct ctdb_request *req,
188 struct ctdb_reply_call *inhdr = io_elem_data(req->reply, &len);
190 /* Library user error if this isn't a reply to a call. */
191 if (req->hdr.hdr->operation != CTDB_REQ_CALL) {
194 "This was not a ctdbd call request: operation %u",
195 req->hdr.hdr->operation);
199 if (req->hdr.call->callid != callid) {
202 "This was not a ctdbd %u call request: %u",
203 callid, req->hdr.call->callid);
207 /* ctdbd or our error if this isn't a reply call. */
208 if (len < sizeof(*inhdr) || inhdr->hdr.operation != CTDB_REPLY_CALL) {
210 DEBUG(ctdb, LOG_CRIT,
211 "Invalid ctdbd call reply: len %zu, operation %u",
212 len, inhdr->hdr.operation);
219 /* Sanity-checking wrapper for reply. */
220 struct ctdb_reply_control *unpack_reply_control(struct ctdb_connection *ctdb,
221 struct ctdb_request *req,
222 enum ctdb_controls control)
225 struct ctdb_reply_control *inhdr = io_elem_data(req->reply, &len);
227 /* Library user error if this isn't a reply to a call. */
228 if (len < sizeof(*inhdr)) {
230 DEBUG(ctdb, LOG_CRIT,
231 "Short ctdbd control reply: %zu bytes", len);
234 if (req->hdr.hdr->operation != CTDB_REQ_CONTROL) {
237 "This was not a ctdbd control request: operation %u",
238 req->hdr.hdr->operation);
242 /* ... or if it was a different control from what we expected. */
243 if (req->hdr.control->opcode != control) {
246 "This was not an opcode %u ctdbd control request: %u",
247 control, req->hdr.control->opcode);
251 /* ctdbd or our error if this isn't a reply call. */
252 if (inhdr->hdr.operation != CTDB_REPLY_CONTROL) {
254 DEBUG(ctdb, LOG_CRIT,
255 "Invalid ctdbd control reply: operation %u",
256 inhdr->hdr.operation);
263 static void handle_incoming(struct ctdb_connection *ctdb, struct io_elem *in)
265 struct ctdb_req_header *hdr;
267 struct ctdb_request *i;
269 hdr = io_elem_data(in, &len);
270 /* FIXME: use len to check packet! */
272 if (hdr->operation == CTDB_REQ_MESSAGE) {
273 deliver_message(ctdb, hdr);
277 for (i = ctdb->doneq; i; i = i->next) {
278 if (i->hdr.hdr->reqid == hdr->reqid) {
279 DLIST_REMOVE(ctdb->doneq, i);
281 i->callback(ctdb, i, i->priv_data);
285 DEBUG(ctdb, LOG_WARNING,
286 "Unexpected ctdbd request reply: operation %u reqid %u",
287 hdr->operation, hdr->reqid);
291 /* Remove "harmless" errors. */
292 static ssize_t real_error(ssize_t ret)
294 if (ret < 0 && (errno == EINTR || errno == EWOULDBLOCK))
299 int ctdb_service(struct ctdb_connection *ctdb, int revents)
305 if (revents & POLLOUT) {
307 if (real_error(write_io_elem(ctdb->fd,
308 ctdb->outq->io)) < 0) {
310 "ctdb_service: error writing to ctdbd");
314 if (io_elem_finished(ctdb->outq->io)) {
315 struct ctdb_request *done = ctdb->outq;
316 DLIST_REMOVE(ctdb->outq, done);
317 /* We add at the head: any dead ones
319 DLIST_ADD(ctdb->doneq, done);
324 while (revents & POLLIN) {
328 ctdb->in = new_io_elem(sizeof(struct ctdb_req_header));
331 "ctdb_service: allocating readbuf");
337 ret = read_io_elem(ctdb->fd, ctdb->in);
338 if (real_error(ret) < 0 || ret == 0) {
339 /* They closed fd? */
343 "ctdb_service: error reading from ctdbd");
346 } else if (ret < 0) {
347 /* No progress, stop loop. */
349 } else if (io_elem_finished(ctdb->in)) {
350 handle_incoming(ctdb, ctdb->in);
358 /* This is inefficient. We could pull in idtree.c. */
359 static bool reqid_used(const struct ctdb_connection *ctdb, uint32_t reqid)
361 struct ctdb_request *i;
363 for (i = ctdb->outq; i; i = i->next) {
364 if (i->hdr.hdr->reqid == reqid) {
368 for (i = ctdb->doneq; i; i = i->next) {
369 if (i->hdr.hdr->reqid == reqid) {
376 uint32_t new_reqid(struct ctdb_connection *ctdb)
378 while (reqid_used(ctdb, ctdb->next_id)) {
381 return ctdb->next_id++;
384 struct ctdb_request *new_ctdb_control_request(struct ctdb_connection *ctdb,
387 const void *extra_data,
389 ctdb_callback_t callback,
392 struct ctdb_request *req;
393 struct ctdb_req_control *pkt;
395 req = new_ctdb_request(offsetof(struct ctdb_req_control, data) + extra, callback, cbdata);
399 io_elem_init_req_header(req->io,
400 CTDB_REQ_CONTROL, destnode, new_reqid(ctdb));
402 pkt = req->hdr.control;
404 pkt->opcode = opcode;
408 pkt->datalen = extra;
409 memcpy(pkt->data, extra_data, extra);
410 DLIST_ADD(ctdb->outq, req);
414 void ctdb_cancel_callback(struct ctdb_connection *ctdb,
415 struct ctdb_request *req,
418 ctdb_request_free(ctdb, req);
421 int ctdb_cancel(struct ctdb_connection *ctdb, struct ctdb_request *req)
423 DEBUG(ctdb, LOG_DEBUG, "ctdb_cancel: %p (id %u)",
424 req, req->hdr.hdr ? req->hdr.hdr->reqid : 0);
426 /* FIXME: If it's not sent, we could just free it right now. */
427 req->callback = ctdb_cancel_callback;
432 struct ctdb_connection *ctdb;
436 struct tdb_context *tdb;
438 ctdb_callback_t callback;
442 static void attachdb_getdbpath_done(struct ctdb_connection *ctdb,
443 struct ctdb_request *req,
446 struct ctdb_db *db = _db;
448 /* Do callback on original request. */
449 db->callback(ctdb, req->extra, db->private_data);
452 struct ctdb_db *ctdb_attachdb_recv(struct ctdb_connection *ctdb,
453 struct ctdb_request *req)
455 struct ctdb_request *dbpath_req = req->extra;
456 struct ctdb_reply_control *reply;
457 struct ctdb_db *db = req->priv_data;
458 uint32_t tdb_flags = db->tdb_flags;
460 /* Never sent the dbpath request? We've failed. */
462 /* FIXME: Save errno? */
467 reply = unpack_reply_control(ctdb, dbpath_req, CTDB_CONTROL_GETDBPATH);
471 if (reply->status != 0) {
472 DEBUG(db->ctdb, LOG_ERR,
473 "ctdb_attachdb_recv: reply status %i", reply->status);
477 tdb_flags = db->persistent ? TDB_DEFAULT : TDB_NOSYNC;
478 tdb_flags |= TDB_DISALLOW_NESTING;
480 /* FIXME: Setup logging to go through our logging. */
481 db->tdb = tdb_open((char *)reply->data, 0, tdb_flags, O_RDWR, 0);
482 if (db->tdb == NULL) {
483 DEBUG(db->ctdb, LOG_ERR,
484 "ctdb_attachdb_recv: failed to tdb_open %s",
485 (char *)reply->data);
489 /* Finally, separate the db from the request (see destroy_req_db). */
490 req->priv_data = NULL;
491 DEBUG(db->ctdb, LOG_DEBUG,
492 "ctdb_attachdb_recv: db %p, tdb %s", db, (char *)reply->data);
496 static void attachdb_done(struct ctdb_connection *ctdb,
497 struct ctdb_request *req,
500 struct ctdb_db *db = _db;
501 struct ctdb_request *req2;
502 struct ctdb_reply_control *reply;
503 enum ctdb_controls control = CTDB_CONTROL_DB_ATTACH;
505 if (db->persistent) {
506 control = CTDB_CONTROL_DB_ATTACH_PERSISTENT;
509 reply = unpack_reply_control(ctdb, req, control);
510 if (!reply || reply->status != 0) {
513 "ctdb_attachdb_send(async): DB_ATTACH status %i",
516 /* We failed. Hand request to user and have them discover it
517 * via ctdb_attachdb_recv. */
518 db->callback(ctdb, req, db->private_data);
521 db->id = *(uint32_t *)reply->data;
523 /* Now we do another call, to get the dbpath. */
524 req2 = new_ctdb_control_request(db->ctdb, CTDB_CONTROL_GETDBPATH,
526 &db->id, sizeof(db->id),
527 attachdb_getdbpath_done, db);
529 DEBUG(db->ctdb, LOG_ERR,
530 "ctdb_attachdb_send(async): failed to allocate");
531 db->callback(ctdb, req, db->private_data);
536 DEBUG(db->ctdb, LOG_DEBUG,
537 "ctdb_attachdb_send(async): created getdbpath request");
540 static void destroy_req_db(struct ctdb_connection *ctdb,
541 struct ctdb_request *req)
543 /* Incomplete db is in priv_data. */
544 free(req->priv_data);
545 /* second request is chained off this one. */
547 ctdb_request_free(ctdb, req->extra);
551 struct ctdb_request *
552 ctdb_attachdb_send(struct ctdb_connection *ctdb,
553 const char *name, int persistent, uint32_t tdb_flags,
554 ctdb_callback_t callback, void *private_data)
556 struct ctdb_request *req;
560 /* FIXME: Search if db already open. */
561 db = malloc(sizeof(*db));
567 opcode = CTDB_CONTROL_DB_ATTACH_PERSISTENT;
569 opcode = CTDB_CONTROL_DB_ATTACH;
572 req = new_ctdb_control_request(ctdb, opcode, CTDB_CURRENT_NODE, name,
573 strlen(name) + 1, attachdb_done, db);
575 DEBUG(db->ctdb, LOG_ERR,
576 "ctdb_attachdb_send: failed allocating DB_ATTACH");
582 db->tdb_flags = tdb_flags;
583 db->persistent = persistent;
584 db->callback = callback;
585 db->private_data = private_data;
587 req->extra_destructor = destroy_req_db;
588 /* This is set non-NULL when we succeed, see ctdb_attachdb_recv */
591 /* Flags get overloaded into srvid. */
592 req->hdr.control->srvid = tdb_flags;
593 DEBUG(db->ctdb, LOG_DEBUG,
594 "ctdb_attachdb_send: DB_ATTACH request %p", req);
599 struct ctdb_db *ctdb_db;
602 /* This will always be true by the time user sees this. */
604 struct ctdb_ltdb_header *hdr;
606 /* For convenience, we stash original callback here. */
607 ctdb_rrl_callback_t callback;
610 void ctdb_release_lock(struct ctdb_lock *lock)
613 DEBUG(lock->ctdb_db->ctdb, LOG_DEBUG,
614 "ctdb_attachdb_send: ctdb_release_lock %p", lock);
615 tdb_chainunlock(lock->ctdb_db->tdb, lock->key);
620 static void ctdb_free_lock(struct ctdb_lock *lock)
623 /* FIXME: report error. Callback never released the lock */
624 ctdb_release_lock(lock);
631 /* We keep the lock if local node is the dmaster. */
632 static bool try_readrecordlock(struct ctdb_lock *lock, TDB_DATA *data)
634 struct ctdb_ltdb_header *hdr;
636 if (tdb_chainlock(lock->ctdb_db->tdb, lock->key) != 0) {
637 DEBUG(lock->ctdb_db->ctdb, LOG_WARNING,
638 "ctdb_readrecordlock_async: failed to chainlock");
642 hdr = ctdb_local_fetch(lock->ctdb_db->tdb, lock->key, data);
643 if (hdr && hdr->dmaster == lock->ctdb_db->ctdb->pnn) {
644 DEBUG(lock->ctdb_db->ctdb, LOG_DEBUG,
645 "ctdb_readrecordlock_async: got local lock");
651 tdb_chainunlock(lock->ctdb_db->tdb, lock->key);
656 /* If they shutdown before we hand them the lock, we free it here. */
657 static void destroy_lock(struct ctdb_connection *ctdb,
658 struct ctdb_request *req)
660 ctdb_release_lock(req->extra);
661 ctdb_free_lock(req->extra);
664 static void readrecordlock_retry(struct ctdb_connection *ctdb,
665 struct ctdb_request *req, void *private)
667 struct ctdb_lock *lock = req->extra;
668 struct ctdb_reply_call *reply;
671 /* OK, we've received reply to noop migration */
672 reply = unpack_reply_call(ctdb, req, CTDB_NULL_FUNC);
673 if (!reply || reply->status != 0) {
676 "ctdb_readrecordlock_async(async):"
677 " NULL_FUNC returned %i", reply->status);
679 lock->callback(lock->ctdb_db, NULL, tdb_null, private);
680 ctdb_request_free(ctdb, req); /* Also frees lock. */
681 ctdb_free_lock(lock);
685 /* Can we get lock now? */
686 if (try_readrecordlock(lock, &data)) {
687 /* Now it's their responsibility to free lock & request! */
688 req->extra_destructor = NULL;
689 lock->callback(lock->ctdb_db, lock, data, private);
690 ctdb_free_lock(lock);
694 /* Retransmit the same request again (we lost race). */
695 io_elem_reset(req->io);
696 DLIST_ADD(ctdb->outq, req);
700 ctdb_readrecordlock_async(struct ctdb_db *ctdb_db, TDB_DATA key,
701 ctdb_rrl_callback_t callback, void *cbdata)
703 struct ctdb_request *req;
704 struct ctdb_lock *lock;
708 lock = malloc(sizeof(*lock) + key.dsize);
710 DEBUG(ctdb_db->ctdb, LOG_ERR,
711 "ctdb_readrecordlock_async: lock allocation failed");
714 lock->key.dptr = (void *)(lock + 1);
715 memcpy(lock->key.dptr, key.dptr, key.dsize);
716 lock->key.dsize = key.dsize;
717 lock->ctdb_db = ctdb_db;
722 if (try_readrecordlock(lock, &data)) {
723 callback(ctdb_db, lock, data, cbdata);
724 ctdb_free_lock(lock);
728 /* Slow path: create request. */
729 req = new_ctdb_request(offsetof(struct ctdb_req_call, data)
730 + key.dsize, readrecordlock_retry, cbdata);
732 DEBUG(ctdb_db->ctdb, LOG_ERR,
733 "ctdb_readrecordlock_async: allocation failed");
734 ctdb_release_lock(lock);
735 ctdb_free_lock(lock);
739 req->extra_destructor = destroy_lock;
740 /* We store the original callback in the lock, and use our own. */
741 lock->callback = callback;
743 io_elem_init_req_header(req->io, CTDB_REQ_CALL, CTDB_CURRENT_NODE,
744 new_reqid(ctdb_db->ctdb));
746 req->hdr.call->flags = CTDB_IMMEDIATE_MIGRATION;
747 req->hdr.call->db_id = ctdb_db->id;
748 req->hdr.call->callid = CTDB_NULL_FUNC;
749 req->hdr.call->hopcount = 0;
750 req->hdr.call->keylen = key.dsize;
751 req->hdr.call->calldatalen = 0;
752 memcpy(req->hdr.call->data, key.dptr, key.dsize);
753 DLIST_ADD(ctdb_db->ctdb->outq, req);
757 int ctdb_writerecord(struct ctdb_lock *lock, TDB_DATA data)
759 if (lock->ctdb_db->persistent) {
761 DEBUG(lock->ctdb_db->ctdb, LOG_ERR,
762 "ctdb_writerecord: cannot write to persistent db");
767 /* FIXME: Report error. */
771 return ctdb_local_store(lock->ctdb_db->tdb, lock->key, lock->hdr,