4 Copyright (C) Rusty Russell 2010
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
25 #include <sys/socket.h>
27 #include "libctdb_private.h"
29 #include "local_tdb.h"
31 #include <dlinklist.h>
32 #include <ctdb_protocol.h>
34 /* Remove type-safety macros. */
35 #undef ctdb_attachdb_send
36 #undef ctdb_readrecordlock_send
38 /* FIXME: Could be in shared util code with rest of ctdb */
39 static void close_noerr(int fd)
46 /* FIXME: Could be in shared util code with rest of ctdb */
47 static void free_noerr(void *p)
54 /* FIXME: Could be in shared util code with rest of ctdb */
55 static void set_nonblocking(int fd)
58 v = fcntl(fd, F_GETFL, 0);
59 fcntl(fd, F_SETFL, v | O_NONBLOCK);
62 /* FIXME: Could be in shared util code with rest of ctdb */
63 static void set_close_on_exec(int fd)
66 v = fcntl(fd, F_GETFD, 0);
67 fcntl(fd, F_SETFD, v | FD_CLOEXEC);
70 static void set_pnn(struct ctdb_connection *ctdb,
71 struct ctdb_request *req,
74 if (ctdb_getpnn_recv(req, &ctdb->pnn) != 0) {
75 /* FIXME: Report error. */
80 struct ctdb_connection *ctdb_connect(const char *addr)
82 struct ctdb_connection *ctdb;
83 struct sockaddr_un sun;
85 ctdb = malloc(sizeof(*ctdb));
90 ctdb->immediateq = NULL;
92 ctdb->message_handlers = NULL;
94 memset(&sun, 0, sizeof(sun));
95 sun.sun_family = AF_UNIX;
98 strncpy(sun.sun_path, addr, sizeof(sun.sun_path));
99 ctdb->fd = socket(AF_UNIX, SOCK_STREAM, 0);
103 set_nonblocking(ctdb->fd);
104 set_close_on_exec(ctdb->fd);
106 if (connect(ctdb->fd, (struct sockaddr *)&sun, sizeof(sun)) == -1)
109 /* Immediately queue a request to get our pnn. */
110 if (!ctdb_getpnn_send(ctdb, CTDB_CURRENT_NODE, set_pnn, NULL))
116 close_noerr(ctdb->fd);
123 int ctdb_get_fd(struct ctdb_connection *ctdb)
128 int ctdb_which_events(struct ctdb_connection *ctdb)
137 struct ctdb_request *new_ctdb_request(size_t len,
138 ctdb_callback_t cb, void *cbdata)
140 struct ctdb_request *req = malloc(sizeof(*req));
143 req->io = new_io_elem(len);
148 req->hdr.hdr = io_elem_data(req->io, NULL);
151 req->priv_data = cbdata;
153 req->extra_destructor = NULL;
157 void ctdb_request_free(struct ctdb_request *req)
159 if (req->extra_destructor) {
160 req->extra_destructor(req);
163 free_io_elem(req->reply);
165 free_io_elem(req->io);
169 /* Sanity-checking wrapper for reply.
171 static struct ctdb_reply_call *unpack_reply_call(struct ctdb_request *req,
175 struct ctdb_reply_call *inhdr = io_elem_data(req->reply, &len);
177 /* ctdbd or our error if this isn't a reply call. */
178 if (len < sizeof(*inhdr) || inhdr->hdr.operation != CTDB_REPLY_CALL) {
183 /* Library user error if this isn't a reply to a call. */
184 if (req->hdr.hdr->operation != CTDB_REQ_CALL
185 || req->hdr.call->callid != callid) {
193 /* Sanity-checking wrapper for reply.
195 struct ctdb_reply_control *unpack_reply_control(struct ctdb_request *req,
196 enum ctdb_controls control)
199 struct ctdb_reply_control *inhdr = io_elem_data(req->reply, &len);
201 /* Library user error if this isn't a reply to a call. */
202 if (len < sizeof(*inhdr)
203 || req->hdr.hdr->operation != CTDB_REQ_CONTROL) {
208 /* ... or if it was a different control from what we expected. */
209 if (req->hdr.control->opcode != control) {
214 /* ctdbd or our error if this isn't a reply call. */
215 if (inhdr->hdr.operation != CTDB_REPLY_CONTROL) {
223 static void handle_incoming(struct ctdb_connection *ctdb, struct io_elem *in)
225 struct ctdb_req_header *hdr;
227 struct ctdb_request *i;
229 hdr = io_elem_data(in, &len);
230 /* FIXME: use len to check packet! */
232 if (hdr->operation == CTDB_REQ_MESSAGE) {
233 deliver_message(ctdb, hdr);
237 for (i = ctdb->doneq; i; i = i->next) {
238 if (i->hdr.hdr->reqid == hdr->reqid) {
239 DLIST_REMOVE(ctdb->doneq, i);
241 i->callback(ctdb, i, i->priv_data);
245 /* FIXME: report this error. */
249 /* Remove "harmless" errors. */
250 static ssize_t real_error(ssize_t ret)
252 if (ret < 0 && (errno == EINTR || errno == EWOULDBLOCK))
257 int ctdb_service(struct ctdb_connection *ctdb, int revents)
263 if (revents & POLLOUT) {
265 if (real_error(write_io_elem(ctdb->fd,
266 ctdb->outq->io)) < 0) {
270 if (io_elem_finished(ctdb->outq->io)) {
271 struct ctdb_request *done = ctdb->outq;
272 DLIST_REMOVE(ctdb->outq, done);
273 DLIST_ADD_END(ctdb->doneq, done,
274 struct ctdb_request);
279 while (revents & POLLIN) {
283 ctdb->in = new_io_elem(sizeof(struct ctdb_req_header));
290 ret = read_io_elem(ctdb->fd, ctdb->in);
291 if (real_error(ret) < 0 || ret == 0) {
292 /* They closed fd? */
297 } else if (ret < 0) {
298 /* No progress, stop loop. */
300 } else if (io_elem_finished(ctdb->in)) {
301 handle_incoming(ctdb, ctdb->in);
306 while (ctdb->immediateq) {
307 struct ctdb_request *imm = ctdb->immediateq;
308 imm->callback(ctdb, imm, imm->priv_data);
309 DLIST_REMOVE(ctdb->immediateq, imm);
315 /* This is inefficient. We could pull in idtree.c. */
316 static bool reqid_used(const struct ctdb_connection *ctdb, uint32_t reqid)
318 struct ctdb_request *i;
320 for (i = ctdb->outq; i; i = i->next) {
321 if (i->hdr.hdr->reqid == reqid) {
325 for (i = ctdb->doneq; i; i = i->next) {
326 if (i->hdr.hdr->reqid == reqid) {
333 uint32_t new_reqid(struct ctdb_connection *ctdb)
335 while (reqid_used(ctdb, ctdb->next_id)) {
338 return ctdb->next_id++;
341 struct ctdb_request *new_ctdb_control_request(struct ctdb_connection *ctdb,
344 const void *extra_data,
346 ctdb_callback_t callback,
349 struct ctdb_request *req;
350 struct ctdb_req_control *pkt;
352 req = new_ctdb_request(sizeof(*pkt) + extra, callback, cbdata);
356 io_elem_init_req_header(req->io,
357 CTDB_REQ_CONTROL, destnode, new_reqid(ctdb));
359 pkt = req->hdr.control;
360 pkt->opcode = opcode;
364 pkt->datalen = extra;
365 memcpy(pkt->data, extra_data, extra);
366 DLIST_ADD_END(ctdb->outq, req, struct ctdb_request);
370 void ctdb_cancel_callback(struct ctdb_connection *ctdb,
371 struct ctdb_request *req,
374 ctdb_request_free(req);
377 int ctdb_cancel(struct ctdb_request *req)
379 /* FIXME: If it's not sent, we could just free it right now. */
380 req->callback = ctdb_cancel_callback;
385 struct ctdb_connection *ctdb;
389 struct tdb_context *tdb;
391 ctdb_callback_t callback;
395 static void attachdb_getdbpath_done(struct ctdb_connection *ctdb,
396 struct ctdb_request *req,
399 struct ctdb_db *db = _db;
401 /* Do callback on original request. */
402 db->callback(ctdb, req->extra, db->private_data);
405 struct ctdb_db *ctdb_attachdb_recv(struct ctdb_request *req)
407 struct ctdb_request *dbpath_req = req->extra;
408 struct ctdb_reply_control *reply;
409 struct ctdb_db *db = req->priv_data;
410 uint32_t tdb_flags = db->tdb_flags;
412 /* Never sent the dbpath request? We've failed. */
414 /* FIXME: Save errno? */
419 reply = unpack_reply_control(dbpath_req, CTDB_CONTROL_GETDBPATH);
420 if (!reply || reply->status != 0) {
424 tdb_flags = db->persistent ? TDB_DEFAULT : TDB_NOSYNC;
425 tdb_flags |= TDB_DISALLOW_NESTING;
427 db->tdb = tdb_open((char *)reply->data, 0, tdb_flags, O_RDWR, 0);
428 if (db->tdb == NULL) {
432 /* Finally, separate the db from the request (see destroy_req_db). */
433 req->priv_data = NULL;
437 static void attachdb_done(struct ctdb_connection *ctdb,
438 struct ctdb_request *req,
441 struct ctdb_db *db = _db;
442 struct ctdb_request *req2;
443 struct ctdb_reply_control *reply;
444 enum ctdb_controls control = CTDB_CONTROL_DB_ATTACH;
446 if (db->persistent) {
447 control = CTDB_CONTROL_DB_ATTACH_PERSISTENT;
450 reply = unpack_reply_control(req, control);
451 if (!reply || reply->status != 0) {
452 /* We failed. Hand request to user and have them discover it
453 * via ctdb_attachdb_recv. */
454 db->callback(ctdb, req, db);
457 db->id = *(uint32_t *)reply->data;
459 /* Now we do another call, to get the dbpath. */
460 req2 = new_ctdb_control_request(db->ctdb, CTDB_CONTROL_GETDBPATH,
462 &db->id, sizeof(db->id),
463 attachdb_getdbpath_done, db);
465 db->callback(ctdb, req, db);
472 static void destroy_req_db(struct ctdb_request *req)
474 /* Incomplete db is in priv_data. */
475 free(req->priv_data);
476 /* second request is chained off this one. */
478 ctdb_request_free(req->extra);
482 struct ctdb_request *
483 ctdb_attachdb_send(struct ctdb_connection *ctdb,
484 const char *name, int persistent, uint32_t tdb_flags,
485 ctdb_callback_t callback, void *private_data)
487 struct ctdb_request *req;
491 /* FIXME: Search if db already open. */
492 db = malloc(sizeof(*db));
498 opcode = CTDB_CONTROL_DB_ATTACH_PERSISTENT;
500 opcode = CTDB_CONTROL_DB_ATTACH;
503 req = new_ctdb_control_request(ctdb, opcode, CTDB_CURRENT_NODE, name,
504 strlen(name) + 1, attachdb_done, db);
511 db->tdb_flags = tdb_flags;
512 db->persistent = persistent;
513 db->callback = callback;
514 db->private_data = private_data;
516 req->extra_destructor = destroy_req_db;
517 /* This is set non-NULL when we succeed, see ctdb_attachdb_recv */
520 /* Flags get overloaded into srvid. */
521 req->hdr.control->srvid = tdb_flags;
526 struct ctdb_db *ctdb_db;
529 /* This will always be true by the time user sees this. */
531 struct ctdb_ltdb_header *hdr;
534 /* For convenience, we stash original callback here. */
535 ctdb_callback_t callback;
538 void ctdb_release_lock(struct ctdb_lock *lock)
541 tdb_chainunlock(lock->ctdb_db->tdb, lock->key);
543 free(lock->hdr); /* Also frees data */
547 /* We keep the lock if local node is the dmaster. */
548 static bool try_readrecordlock(struct ctdb_lock *lock)
550 struct ctdb_ltdb_header *hdr;
552 if (tdb_chainlock(lock->ctdb_db->tdb, lock->key) != 0) {
556 hdr = ctdb_local_fetch(lock->ctdb_db->tdb, lock->key, &lock->data);
557 if (hdr && hdr->dmaster == lock->ctdb_db->ctdb->pnn) {
563 tdb_chainunlock(lock->ctdb_db->tdb, lock->key);
568 /* If they cancel *before* we hand them the lock from
569 * ctdb_readrecordlock_recv, we free it here. */
570 static void destroy_lock(struct ctdb_request *req)
572 ctdb_release_lock(req->extra);
575 struct ctdb_lock *ctdb_readrecordlock_recv(struct ctdb_db *ctdb_db,
576 struct ctdb_request *req,
579 struct ctdb_lock *lock = req->extra;
582 /* Something went wrong. */
586 /* Now it's their responsibility to free! */
587 req->extra_destructor = NULL;
592 static void readrecordlock_retry(struct ctdb_connection *ctdb,
593 struct ctdb_request *req, void *private)
595 struct ctdb_lock *lock = req->extra;
596 struct ctdb_reply_call *reply;
598 /* OK, we've received reply to noop migration */
599 reply = unpack_reply_call(req, CTDB_NULL_FUNC);
600 if (!reply || reply->status != 0) {
601 lock->callback(ctdb, req, private);
605 /* Can we get lock now? */
606 if (try_readrecordlock(lock)) {
607 lock->callback(ctdb, req, private);
611 /* Retransmit the same request again (we lost race). */
612 io_elem_reset(req->io);
613 DLIST_ADD_END(ctdb->outq, req, struct ctdb_request);
617 struct ctdb_request *
618 ctdb_readrecordlock_send(struct ctdb_db *ctdb_db, TDB_DATA key,
619 ctdb_callback_t callback, void *cbdata)
621 struct ctdb_request *req;
622 struct ctdb_lock *lock;
625 lock = malloc(sizeof(*lock) + key.dsize);
629 lock->key.dptr = (void *)(lock + 1);
630 memcpy(lock->key.dptr, key.dptr, key.dsize);
631 lock->key.dsize = key.dsize;
632 lock->ctdb_db = ctdb_db;
636 /* Get ready in case we need to send a migrate request. */
637 req = new_ctdb_request(sizeof(*req->hdr.call)
638 + key.dsize, callback, cbdata);
640 ctdb_release_lock(lock);
644 req->extra_destructor = destroy_lock;
646 if (try_readrecordlock(lock)) {
647 /* Already got it: prepare for immediate callback. */
648 DLIST_ADD_END(ctdb_db->ctdb->immediateq,
649 req, struct ctdb_request);
653 /* We store the original callback in the lock, and use our own. */
654 lock->callback = callback;
655 req->callback = readrecordlock_retry;
657 io_elem_init_req_header(req->io, CTDB_REQ_CALL, CTDB_CURRENT_NODE,
658 new_reqid(ctdb_db->ctdb));
660 req->hdr.call->flags = CTDB_IMMEDIATE_MIGRATION;
661 req->hdr.call->db_id = ctdb_db->id;
662 req->hdr.call->callid = CTDB_NULL_FUNC;
663 req->hdr.call->hopcount = 0;
664 req->hdr.call->keylen = key.dsize;
665 req->hdr.call->calldatalen = 0;
666 memcpy(req->hdr.call->data, key.dptr, key.dsize);
667 DLIST_ADD_END(ctdb_db->ctdb->outq, req, struct ctdb_request);
671 int ctdb_writerecord(struct ctdb_lock *lock, TDB_DATA data)
673 if (lock->ctdb_db->persistent) {
674 /* FIXME: Report error. */
678 return ctdb_local_store(lock->ctdb_db->tdb, lock->key, lock->hdr,