libctdb: check ctdb_request_free & ctdb_cancel used appropriately.
[sahlberg/ctdb.git] / libctdb / ctdb.c
1 /*
2    core of libctdb
3
4    Copyright (C) Rusty Russell 2010
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include <ctdb.h>
20 #include <poll.h>
21 #include <errno.h>
22 #include <unistd.h>
23 #include <fcntl.h>
24 #include <stdlib.h>
25 #include <sys/socket.h>
26 #include <sys/un.h>
27 #include "libctdb_private.h"
28 #include "io_elem.h"
29 #include "local_tdb.h"
30 #include "messages.h"
31 #include <dlinklist.h>
32 #include <ctdb_protocol.h>
33
34 /* Remove type-safety macros. */
35 #undef ctdb_attachdb_send
36 #undef ctdb_readrecordlock_async
37 #undef ctdb_connect
38
39 struct ctdb_lock {
40         struct ctdb_lock *next, *prev;
41
42         struct ctdb_db *ctdb_db;
43         TDB_DATA key;
44
45         /* This will always be set by the time user sees this. */
46         unsigned long held_magic;
47         struct ctdb_ltdb_header *hdr;
48
49         /* For convenience, we stash original callback here. */
50         ctdb_rrl_callback_t callback;
51 };
52
53 static void remove_lock(struct ctdb_connection *ctdb, struct ctdb_lock *lock)
54 {
55         DLIST_REMOVE(ctdb->locks, lock);
56 }
57
58 /* FIXME: for thread safety, need tid info too. */
59 static bool holding_lock(struct ctdb_connection *ctdb)
60 {
61         /* For the moment, you can't ever hold more than 1 lock. */
62         return (ctdb->locks != NULL);
63 }
64
65 static void add_lock(struct ctdb_connection *ctdb, struct ctdb_lock *lock)
66 {
67         DLIST_ADD(ctdb->locks, lock);
68 }
69
70 /* FIXME: Could be in shared util code with rest of ctdb */
71 static void close_noerr(int fd)
72 {
73         int olderr = errno;
74         close(fd);
75         errno = olderr;
76 }
77
78 /* FIXME: Could be in shared util code with rest of ctdb */
79 static void free_noerr(void *p)
80 {
81         int olderr = errno;
82         free(p);
83         errno = olderr;
84 }
85
86 /* FIXME: Could be in shared util code with rest of ctdb */
87 static void set_nonblocking(int fd)
88 {
89         unsigned v;
90         v = fcntl(fd, F_GETFL, 0);
91         fcntl(fd, F_SETFL, v | O_NONBLOCK);
92 }
93
94 /* FIXME: Could be in shared util code with rest of ctdb */
95 static void set_close_on_exec(int fd)
96 {
97         unsigned v;
98         v = fcntl(fd, F_GETFD, 0);
99         fcntl(fd, F_SETFD, v | FD_CLOEXEC);
100 }
101
102 static void set_pnn(struct ctdb_connection *ctdb,
103                     struct ctdb_request *req,
104                     void *unused)
105 {
106         if (!ctdb_getpnn_recv(ctdb, req, &ctdb->pnn)) {
107                 DEBUG(ctdb, LOG_CRIT,
108                       "ctdb_connect(async): failed to get pnn");
109                 ctdb->broken = true;
110         }
111         ctdb_request_free(ctdb, req);
112 }
113
114 struct ctdb_connection *ctdb_connect(const char *addr,
115                                      ctdb_log_fn_t log_fn, void *log_priv)
116 {
117         struct ctdb_connection *ctdb;
118         struct sockaddr_un sun;
119
120         ctdb = malloc(sizeof(*ctdb));
121         if (!ctdb) {
122                 /* With no format string, we hope it doesn't use ap! */
123                 va_list ap;
124                 memset(&ap, 0, sizeof(ap));
125                 errno = ENOMEM;
126                 log_fn(log_priv, LOG_ERR, "ctdb_connect: no memory", ap);
127                 goto fail;
128         }
129         ctdb->outq = NULL;
130         ctdb->doneq = NULL;
131         ctdb->in = NULL;
132         ctdb->message_handlers = NULL;
133         ctdb->next_id = 0;
134         ctdb->broken = false;
135         ctdb->log = log_fn;
136         ctdb->log_priv = log_priv;
137         ctdb->locks = NULL;
138
139         memset(&sun, 0, sizeof(sun));
140         sun.sun_family = AF_UNIX;
141         if (!addr)
142                 addr = CTDB_PATH;
143         strncpy(sun.sun_path, addr, sizeof(sun.sun_path));
144         ctdb->fd = socket(AF_UNIX, SOCK_STREAM, 0);
145         if (ctdb->fd < 0)
146                 goto free_fail;
147
148         set_nonblocking(ctdb->fd);
149         set_close_on_exec(ctdb->fd);
150
151         if (connect(ctdb->fd, (struct sockaddr *)&sun, sizeof(sun)) == -1)
152                 goto close_fail;
153
154         /* Immediately queue a request to get our pnn. */
155         if (!ctdb_getpnn_send(ctdb, CTDB_CURRENT_NODE, set_pnn, NULL))
156                 goto close_fail;
157
158         return ctdb;
159
160 close_fail:
161         close_noerr(ctdb->fd);
162 free_fail:
163         free_noerr(ctdb);
164 fail:
165         return NULL;
166 }
167
168 int ctdb_get_fd(struct ctdb_connection *ctdb)
169 {
170         return ctdb->fd;
171 }
172
173 int ctdb_which_events(struct ctdb_connection *ctdb)
174 {
175         int events = POLLIN;
176
177         if (ctdb->outq)
178                 events |= POLLOUT;
179         return events;
180 }
181
182 struct ctdb_request *new_ctdb_request(size_t len,
183                                       ctdb_callback_t cb, void *cbdata)
184 {
185         struct ctdb_request *req = malloc(sizeof(*req));
186         if (!req)
187                 return NULL;
188         req->io = new_io_elem(len);
189         if (!req->io) {
190                 free(req);
191                 return NULL;
192         }
193         req->hdr.hdr = io_elem_data(req->io, NULL);
194         req->reply = NULL;
195         req->callback = cb;
196         req->priv_data = cbdata;
197         req->extra = NULL;
198         req->extra_destructor = NULL;
199         return req;
200 }
201
202 void ctdb_request_free(struct ctdb_connection *ctdb, struct ctdb_request *req)
203 {
204         if (req->next || req->prev) {
205                 DEBUG(ctdb, LOG_ALERT,
206                       "ctdb_request_free: request not complete! ctdb_cancel? %p (id %u)",
207                       req, req->hdr.hdr ? req->hdr.hdr->reqid : 0);
208                 ctdb_cancel(ctdb, req);
209                 return;
210         }
211         if (req->extra_destructor) {
212                 req->extra_destructor(ctdb, req);
213         }
214         if (req->reply) {
215                 free_io_elem(req->reply);
216         }
217         free_io_elem(req->io);
218         free(req);
219 }
220
221 /* Sanity-checking wrapper for reply. */
222 static struct ctdb_reply_call *unpack_reply_call(struct ctdb_connection *ctdb,
223                                                  struct ctdb_request *req,
224                                                  uint32_t callid)
225 {
226         size_t len;
227         struct ctdb_reply_call *inhdr = io_elem_data(req->reply, &len);
228
229         /* Library user error if this isn't a reply to a call. */
230         if (req->hdr.hdr->operation != CTDB_REQ_CALL) {
231                 errno = EINVAL;
232                 DEBUG(ctdb, LOG_ALERT,
233                       "This was not a ctdbd call request: operation %u",
234                       req->hdr.hdr->operation);
235                 return NULL;
236         }
237
238         if (req->hdr.call->callid != callid) {
239                 errno = EINVAL;
240                 DEBUG(ctdb, LOG_ALERT,
241                       "This was not a ctdbd %u call request: %u",
242                       callid, req->hdr.call->callid);
243                 return NULL;
244         }
245
246         /* ctdbd or our error if this isn't a reply call. */
247         if (len < sizeof(*inhdr) || inhdr->hdr.operation != CTDB_REPLY_CALL) {
248                 errno = EIO;
249                 DEBUG(ctdb, LOG_CRIT,
250                       "Invalid ctdbd call reply: len %zu, operation %u",
251                       len, inhdr->hdr.operation);
252                 return NULL;
253         }
254
255         return inhdr;
256 }
257
258 /* Sanity-checking wrapper for reply. */
259 struct ctdb_reply_control *unpack_reply_control(struct ctdb_connection *ctdb,
260                                                 struct ctdb_request *req,
261                                                 enum ctdb_controls control)
262 {
263         size_t len;
264         struct ctdb_reply_control *inhdr = io_elem_data(req->reply, &len);
265
266         /* Library user error if this isn't a reply to a call. */
267         if (len < sizeof(*inhdr)) {
268                 errno = EINVAL;
269                 DEBUG(ctdb, LOG_ALERT,
270                       "Short ctdbd control reply: %zu bytes", len);
271                 return NULL;
272         }
273         if (req->hdr.hdr->operation != CTDB_REQ_CONTROL) {
274                 errno = EINVAL;
275                 DEBUG(ctdb, LOG_ALERT,
276                       "This was not a ctdbd control request: operation %u",
277                       req->hdr.hdr->operation);
278                 return NULL;
279         }
280
281         /* ... or if it was a different control from what we expected. */
282         if (req->hdr.control->opcode != control) {
283                 errno = EINVAL;
284                 DEBUG(ctdb, LOG_ALERT,
285                       "This was not an opcode %u ctdbd control request: %u",
286                       control, req->hdr.control->opcode);
287                 return NULL;
288         }
289
290         /* ctdbd or our error if this isn't a reply call. */
291         if (inhdr->hdr.operation != CTDB_REPLY_CONTROL) {
292                 errno = EIO;
293                 DEBUG(ctdb, LOG_CRIT,
294                       "Invalid ctdbd control reply: operation %u",
295                       inhdr->hdr.operation);
296                 return NULL;
297         }
298
299         return inhdr;
300 }
301
302 static void handle_incoming(struct ctdb_connection *ctdb, struct io_elem *in)
303 {
304         struct ctdb_req_header *hdr;
305         size_t len;
306         struct ctdb_request *i;
307
308         hdr = io_elem_data(in, &len);
309         /* FIXME: use len to check packet! */
310
311         if (hdr->operation == CTDB_REQ_MESSAGE) {
312                 deliver_message(ctdb, hdr);
313                 return;
314         }
315
316         for (i = ctdb->doneq; i; i = i->next) {
317                 if (i->hdr.hdr->reqid == hdr->reqid) {
318                         DLIST_REMOVE(ctdb->doneq, i);
319                         i->reply = in;
320                         i->callback(ctdb, i, i->priv_data);
321                         return;
322                 }
323         }
324         DEBUG(ctdb, LOG_WARNING,
325               "Unexpected ctdbd request reply: operation %u reqid %u",
326               hdr->operation, hdr->reqid);
327         free_io_elem(in);
328 }
329
330 /* Remove "harmless" errors. */
331 static ssize_t real_error(ssize_t ret)
332 {
333         if (ret < 0 && (errno == EINTR || errno == EWOULDBLOCK))
334                 return 0;
335         return ret;
336 }
337
338 bool ctdb_service(struct ctdb_connection *ctdb, int revents)
339 {
340         if (ctdb->broken) {
341                 return false;
342         }
343
344         if (holding_lock(ctdb)) {
345                 DEBUG(ctdb, LOG_ALERT, "Do not block while holding lock!");
346         }
347
348         if (revents & POLLOUT) {
349                 while (ctdb->outq) {
350                         if (real_error(write_io_elem(ctdb->fd,
351                                                      ctdb->outq->io)) < 0) {
352                                 DEBUG(ctdb, LOG_ERR,
353                                       "ctdb_service: error writing to ctdbd");
354                                 ctdb->broken = true;
355                                 return false;
356                         }
357                         if (io_elem_finished(ctdb->outq->io)) {
358                                 struct ctdb_request *done = ctdb->outq;
359                                 DLIST_REMOVE(ctdb->outq, done);
360                                 /* We add at the head: any dead ones
361                                  * sit and end. */
362                                 DLIST_ADD(ctdb->doneq, done);
363                         }
364                 }
365         }
366
367         while (revents & POLLIN) {
368                 int ret;
369
370                 if (!ctdb->in) {
371                         ctdb->in = new_io_elem(sizeof(struct ctdb_req_header));
372                         if (!ctdb->in) {
373                                 DEBUG(ctdb, LOG_ERR,
374                                       "ctdb_service: allocating readbuf");
375                                 ctdb->broken = true;
376                                 return false;
377                         }
378                 }
379
380                 ret = read_io_elem(ctdb->fd, ctdb->in);
381                 if (real_error(ret) < 0 || ret == 0) {
382                         /* They closed fd? */
383                         if (ret == 0)
384                                 errno = EBADF;
385                         DEBUG(ctdb, LOG_ERR,
386                               "ctdb_service: error reading from ctdbd");
387                         ctdb->broken = true;
388                         return false;
389                 } else if (ret < 0) {
390                         /* No progress, stop loop. */
391                         revents = 0;
392                 } else if (io_elem_finished(ctdb->in)) {
393                         handle_incoming(ctdb, ctdb->in);
394                         ctdb->in = NULL;
395                 }
396         }
397
398         return true;
399 }
400
401 /* This is inefficient.  We could pull in idtree.c. */
402 static bool reqid_used(const struct ctdb_connection *ctdb, uint32_t reqid)
403 {
404         struct ctdb_request *i;
405
406         for (i = ctdb->outq; i; i = i->next) {
407                 if (i->hdr.hdr->reqid == reqid) {
408                         return true;
409                 }
410         }
411         for (i = ctdb->doneq; i; i = i->next) {
412                 if (i->hdr.hdr->reqid == reqid) {
413                         return true;
414                 }
415         }
416         return false;
417 }
418
419 uint32_t new_reqid(struct ctdb_connection *ctdb)
420 {
421         while (reqid_used(ctdb, ctdb->next_id)) {
422                 ctdb->next_id++;
423         }
424         return ctdb->next_id++;
425 }
426
427 struct ctdb_request *new_ctdb_control_request(struct ctdb_connection *ctdb,
428                                               uint32_t opcode,
429                                               uint32_t destnode,
430                                               const void *extra_data,
431                                               size_t extra,
432                                               ctdb_callback_t callback,
433                                               void *cbdata)
434 {
435         struct ctdb_request *req;
436         struct ctdb_req_control *pkt;
437
438         req = new_ctdb_request(offsetof(struct ctdb_req_control, data) + extra, callback, cbdata);
439         if (!req)
440                 return NULL;
441
442         io_elem_init_req_header(req->io,
443                                 CTDB_REQ_CONTROL, destnode, new_reqid(ctdb));
444
445         pkt = req->hdr.control;
446         pkt->pad = 0;
447         pkt->opcode = opcode;
448         pkt->srvid = 0;
449         pkt->client_id = 0;
450         pkt->flags = 0;
451         pkt->datalen = extra;
452         memcpy(pkt->data, extra_data, extra);
453         DLIST_ADD(ctdb->outq, req);
454         return req;
455 }
456
457 void ctdb_cancel_callback(struct ctdb_connection *ctdb,
458                           struct ctdb_request *req,
459                           void *unused)
460 {
461         ctdb_request_free(ctdb, req);
462 }
463
464 void ctdb_cancel(struct ctdb_connection *ctdb, struct ctdb_request *req)
465 {
466         if (!req->next && !req->prev) {
467                 DEBUG(ctdb, LOG_ALERT,
468                       "ctdb_cancel: request completed! ctdb_request_free? %p (id %u)",
469                       req, req->hdr.hdr ? req->hdr.hdr->reqid : 0);
470                 ctdb_request_free(ctdb, req);
471                 return;
472         }
473
474         DEBUG(ctdb, LOG_DEBUG, "ctdb_cancel: %p (id %u)",
475               req, req->hdr.hdr ? req->hdr.hdr->reqid : 0);
476
477         /* FIXME: If it's not sent, we could just free it right now. */
478         req->callback = ctdb_cancel_callback;
479 }
480
481 struct ctdb_db {
482         struct ctdb_connection *ctdb;
483         bool persistent;
484         uint32_t tdb_flags;
485         uint32_t id;
486         struct tdb_context *tdb;
487
488         ctdb_callback_t callback;
489         void *private_data;
490 };
491
492 static void attachdb_getdbpath_done(struct ctdb_connection *ctdb,
493                                     struct ctdb_request *req,
494                                     void *_db)
495 {
496         struct ctdb_db *db = _db;
497
498         /* Do callback on original request. */
499         db->callback(ctdb, req->extra, db->private_data);
500 }
501
502 struct ctdb_db *ctdb_attachdb_recv(struct ctdb_connection *ctdb,
503                                    struct ctdb_request *req)
504 {
505         struct ctdb_request *dbpath_req = req->extra;
506         struct ctdb_reply_control *reply;
507         struct ctdb_db *db = req->priv_data;
508         uint32_t tdb_flags = db->tdb_flags;
509         struct tdb_logging_context log;
510
511         /* Never sent the dbpath request?  We've failed. */
512         if (!dbpath_req) {
513                 /* FIXME: Save errno? */
514                 errno = EINVAL;
515                 return NULL;
516         }
517
518         reply = unpack_reply_control(ctdb, dbpath_req, CTDB_CONTROL_GETDBPATH);
519         if (!reply) {
520                 return NULL;
521         }
522         if (reply->status != 0) {
523                 DEBUG(db->ctdb, LOG_ERR,
524                       "ctdb_attachdb_recv: reply status %i", reply->status);
525                 return NULL;
526         }
527
528         tdb_flags = db->persistent ? TDB_DEFAULT : TDB_NOSYNC;
529         tdb_flags |= TDB_DISALLOW_NESTING;
530
531         log.log_fn = ctdb_tdb_log_bridge;
532         log.log_private = ctdb;
533         db->tdb = tdb_open_ex((char *)reply->data, 0, tdb_flags, O_RDWR, 0,
534                               &log, NULL);
535         if (db->tdb == NULL) {
536                 DEBUG(db->ctdb, LOG_ERR,
537                       "ctdb_attachdb_recv: failed to tdb_open %s",
538                       (char *)reply->data);
539                 return NULL;
540         }
541
542         /* Finally, separate the db from the request (see destroy_req_db). */
543         req->priv_data = NULL;
544         DEBUG(db->ctdb, LOG_DEBUG,
545               "ctdb_attachdb_recv: db %p, tdb %s", db, (char *)reply->data);
546         return db;
547 }
548
549 static void attachdb_done(struct ctdb_connection *ctdb,
550                           struct ctdb_request *req,
551                           void *_db)
552 {
553         struct ctdb_db *db = _db;
554         struct ctdb_request *req2;
555         struct ctdb_reply_control *reply;
556         enum ctdb_controls control = CTDB_CONTROL_DB_ATTACH;
557
558         if (db->persistent) {
559                 control = CTDB_CONTROL_DB_ATTACH_PERSISTENT;
560         }
561
562         reply = unpack_reply_control(ctdb, req, control);
563         if (!reply || reply->status != 0) {
564                 if (reply) {
565                         DEBUG(ctdb, LOG_ERR,
566                               "ctdb_attachdb_send(async): DB_ATTACH status %i",
567                               reply->status);
568                 }
569                 /* We failed.  Hand request to user and have them discover it
570                  * via ctdb_attachdb_recv. */
571                 db->callback(ctdb, req, db->private_data);
572                 return;
573         }
574         db->id = *(uint32_t *)reply->data;
575
576         /* Now we do another call, to get the dbpath. */
577         req2 = new_ctdb_control_request(db->ctdb, CTDB_CONTROL_GETDBPATH,
578                                         CTDB_CURRENT_NODE,
579                                         &db->id, sizeof(db->id),
580                                         attachdb_getdbpath_done, db);
581         if (!req2) {
582                 DEBUG(db->ctdb, LOG_ERR,
583                       "ctdb_attachdb_send(async): failed to allocate");
584                 db->callback(ctdb, req, db->private_data);
585                 return;
586         }
587         req->extra = req2;
588         req2->extra = req;
589         DEBUG(db->ctdb, LOG_DEBUG,
590               "ctdb_attachdb_send(async): created getdbpath request");
591 }
592
593 static void destroy_req_db(struct ctdb_connection *ctdb,
594                            struct ctdb_request *req)
595 {
596         /* Incomplete db is in priv_data. */
597         free(req->priv_data);
598         /* second request is chained off this one. */
599         if (req->extra) {
600                 ctdb_request_free(ctdb, req->extra);
601         }
602 }
603
604 struct ctdb_request *
605 ctdb_attachdb_send(struct ctdb_connection *ctdb,
606                    const char *name, bool persistent, uint32_t tdb_flags,
607                    ctdb_callback_t callback, void *private_data)
608 {
609         struct ctdb_request *req;
610         struct ctdb_db *db;
611         uint32_t opcode;
612
613         /* FIXME: Search if db already open. */
614         db = malloc(sizeof(*db));
615         if (!db) {
616                 return NULL;
617         }
618
619         if (persistent) {
620                 opcode = CTDB_CONTROL_DB_ATTACH_PERSISTENT;
621         } else {
622                 opcode = CTDB_CONTROL_DB_ATTACH;
623         }
624
625         req = new_ctdb_control_request(ctdb, opcode, CTDB_CURRENT_NODE, name,
626                                        strlen(name) + 1, attachdb_done, db);
627         if (!req) {
628                 DEBUG(ctdb, LOG_ERR,
629                       "ctdb_attachdb_send: failed allocating DB_ATTACH");
630                 free(db);
631                 return NULL;
632         }
633
634         db->ctdb = ctdb;
635         db->tdb_flags = tdb_flags;
636         db->persistent = persistent;
637         db->callback = callback;
638         db->private_data = private_data;
639
640         req->extra_destructor = destroy_req_db;
641         /* This is set non-NULL when we succeed, see ctdb_attachdb_recv */
642         req->extra = NULL;
643
644         /* Flags get overloaded into srvid. */
645         req->hdr.control->srvid = tdb_flags;
646         DEBUG(db->ctdb, LOG_DEBUG,
647               "ctdb_attachdb_send: DB_ATTACH request %p", req);
648         return req;
649 }
650
651 static unsigned long lock_magic(struct ctdb_lock *lock)
652 {
653         /* A non-zero magic specific to this structure. */
654         return ((unsigned long)lock->key.dptr
655                 ^ (((unsigned long)lock->key.dptr) << 16)
656                 ^ 0xBADC0FFEEBADC0DEULL)
657                 | 1;
658 }
659
660 /* This is only called on locks before they're held. */
661 static void free_lock(struct ctdb_lock *lock)
662 {
663         if (lock->held_magic) {
664                 DEBUG(lock->ctdb_db->ctdb, LOG_ALERT,
665                       "free_lock invalid lock %p", lock);
666         }
667         free(lock->hdr);
668         free(lock);
669 }
670
671
672 void ctdb_release_lock(struct ctdb_db *ctdb_db, struct ctdb_lock *lock)
673 {
674         if (lock->held_magic != lock_magic(lock)) {
675                 DEBUG(lock->ctdb_db->ctdb, LOG_ALERT,
676                       "ctdb_release_lock invalid lock %p", lock);
677         } else if (lock->ctdb_db != ctdb_db) {
678                 errno = EBADF;
679                 DEBUG(ctdb_db->ctdb, LOG_ALERT,
680                       "ctdb_release_lock: wrong ctdb_db.");
681         } else {
682                 tdb_chainunlock(lock->ctdb_db->tdb, lock->key);
683                 DEBUG(lock->ctdb_db->ctdb, LOG_DEBUG,
684                       "ctdb_release_lock %p", lock);
685                 remove_lock(lock->ctdb_db->ctdb, lock);
686         }
687         lock->held_magic = 0;
688         free_lock(lock);
689 }
690
691
692 /* We keep the lock if local node is the dmaster. */
693 static bool try_readrecordlock(struct ctdb_lock *lock, TDB_DATA *data)
694 {
695         struct ctdb_ltdb_header *hdr;
696
697         if (tdb_chainlock(lock->ctdb_db->tdb, lock->key) != 0) {
698                 DEBUG(lock->ctdb_db->ctdb, LOG_WARNING,
699                       "ctdb_readrecordlock_async: failed to chainlock");
700                 return NULL;
701         }
702
703         hdr = ctdb_local_fetch(lock->ctdb_db->tdb, lock->key, data);
704         if (hdr && hdr->dmaster == lock->ctdb_db->ctdb->pnn) {
705                 DEBUG(lock->ctdb_db->ctdb, LOG_DEBUG,
706                       "ctdb_readrecordlock_async: got local lock");
707                 lock->held_magic = lock_magic(lock);
708                 lock->hdr = hdr;
709                 add_lock(lock->ctdb_db->ctdb, lock);
710                 return true;
711         }
712
713         tdb_chainunlock(lock->ctdb_db->tdb, lock->key);
714         free(hdr);
715         return NULL;
716 }
717
718 /* If they shutdown before we hand them the lock, we free it here. */
719 static void destroy_lock(struct ctdb_connection *ctdb,
720                          struct ctdb_request *req)
721 {
722         free_lock(req->extra);
723 }
724
725 static void readrecordlock_retry(struct ctdb_connection *ctdb,
726                                  struct ctdb_request *req, void *private)
727 {
728         struct ctdb_lock *lock = req->extra;
729         struct ctdb_reply_call *reply;
730         TDB_DATA data;
731
732         /* OK, we've received reply to noop migration */
733         reply = unpack_reply_call(ctdb, req, CTDB_NULL_FUNC);
734         if (!reply || reply->status != 0) {
735                 if (reply) {
736                         DEBUG(ctdb, LOG_ERR,
737                               "ctdb_readrecordlock_async(async):"
738                               " NULL_FUNC returned %i", reply->status);
739                 }
740                 lock->callback(lock->ctdb_db, NULL, tdb_null, private);
741                 ctdb_request_free(ctdb, req); /* Also frees lock. */
742                 return;
743         }
744
745         /* Can we get lock now? */
746         if (try_readrecordlock(lock, &data)) {
747                 /* Now it's their responsibility to free lock & request! */
748                 req->extra_destructor = NULL;
749                 lock->callback(lock->ctdb_db, lock, data, private);
750                 return;
751         }
752
753         /* Retransmit the same request again (we lost race). */
754         io_elem_reset(req->io);
755         DLIST_ADD(ctdb->outq, req);
756 }
757
758 bool
759 ctdb_readrecordlock_async(struct ctdb_db *ctdb_db, TDB_DATA key,
760                           ctdb_rrl_callback_t callback, void *cbdata)
761 {
762         struct ctdb_request *req;
763         struct ctdb_lock *lock;
764         TDB_DATA data;
765
766         if (holding_lock(ctdb_db->ctdb)) {
767                 DEBUG(ctdb_db->ctdb, LOG_ALERT,
768                       "ctdb_readrecordlock_async: already holding lock");
769                 return false;
770         }
771
772         /* Setup lock */
773         lock = malloc(sizeof(*lock) + key.dsize);
774         if (!lock) {
775                 DEBUG(ctdb_db->ctdb, LOG_ERR,
776                       "ctdb_readrecordlock_async: lock allocation failed");
777                 return false;
778         }
779         lock->key.dptr = (void *)(lock + 1);
780         memcpy(lock->key.dptr, key.dptr, key.dsize);
781         lock->key.dsize = key.dsize;
782         lock->ctdb_db = ctdb_db;
783         lock->hdr = NULL;
784         lock->held_magic = 0;
785
786         /* Fast path. */
787         if (try_readrecordlock(lock, &data)) {
788                 callback(ctdb_db, lock, data, cbdata);
789                 return true;
790         }
791
792         /* Slow path: create request. */
793         req = new_ctdb_request(offsetof(struct ctdb_req_call, data)
794                                + key.dsize, readrecordlock_retry, cbdata);
795         if (!req) {
796                 DEBUG(ctdb_db->ctdb, LOG_ERR,
797                       "ctdb_readrecordlock_async: allocation failed");
798                 free_lock(lock);
799                 return NULL;
800         }
801         req->extra = lock;
802         req->extra_destructor = destroy_lock;
803         /* We store the original callback in the lock, and use our own. */
804         lock->callback = callback;
805
806         io_elem_init_req_header(req->io, CTDB_REQ_CALL, CTDB_CURRENT_NODE,
807                                 new_reqid(ctdb_db->ctdb));
808
809         req->hdr.call->flags = CTDB_IMMEDIATE_MIGRATION;
810         req->hdr.call->db_id = ctdb_db->id;
811         req->hdr.call->callid = CTDB_NULL_FUNC;
812         req->hdr.call->hopcount = 0;
813         req->hdr.call->keylen = key.dsize;
814         req->hdr.call->calldatalen = 0;
815         memcpy(req->hdr.call->data, key.dptr, key.dsize);
816         DLIST_ADD(ctdb_db->ctdb->outq, req);
817         return true;
818 }
819
820 bool ctdb_writerecord(struct ctdb_db *ctdb_db,
821                       struct ctdb_lock *lock, TDB_DATA data)
822 {
823         if (lock->ctdb_db != ctdb_db) {
824                 errno = EBADF;
825                 DEBUG(ctdb_db->ctdb, LOG_ALERT,
826                       "ctdb_writerecord: Can not write, wrong ctdb_db.");
827                 return false;
828         }
829
830         if (lock->held_magic != lock_magic(lock)) {
831                 errno = EBADF;
832                 DEBUG(ctdb_db->ctdb, LOG_ALERT,
833                       "ctdb_writerecord: Can not write. Lock has been released.");
834                 return false;
835         }
836                 
837         if (ctdb_db->persistent) {
838                 errno = EINVAL;
839                 DEBUG(ctdb_db->ctdb, LOG_ALERT,
840                       "ctdb_writerecord: cannot write to persistent db");
841                 return false;
842         }
843
844         switch (ctdb_local_store(ctdb_db->tdb, lock->key, lock->hdr, data)) {
845         case 0:
846                 DEBUG(ctdb_db->ctdb, LOG_DEBUG,
847                       "ctdb_writerecord: optimized away noop write.");
848                 /* fall thru */
849         case 1:
850                 return true;
851
852         default:
853                 switch (errno) {
854                 case ENOMEM:
855                         DEBUG(ctdb_db->ctdb, LOG_CRIT,
856                               "ctdb_writerecord: out of memory.");
857                         break;
858                 case EINVAL:
859                         DEBUG(ctdb_db->ctdb, LOG_ALERT,
860                               "ctdb_writerecord: record changed under lock?");
861                         break;
862                 default: /* TDB already logged. */
863                         break;
864                 }
865                 return false;
866         }
867 }