4 Copyright (C) Andrew Tridgell 2007
5 Copyright (C) Ronnie Sahlberg 2007
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, see <http://www.gnu.org/licenses/>.
21 #include "lib/events/events.h"
22 #include "lib/tdb/include/tdb.h"
23 #include "system/time.h"
24 #include "system/network.h"
25 #include "system/filesys.h"
26 #include "system/wait.h"
27 #include "../include/ctdb_private.h"
28 #include "lib/util/dlinklist.h"
32 lock all databases - mark only
34 static int ctdb_lock_all_databases_mark(struct ctdb_context *ctdb)
36 struct ctdb_db_context *ctdb_db;
37 if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
38 DEBUG(DEBUG_ERR,("Attempt to mark all databases locked when not frozen\n"));
41 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
42 if (tdb_lockall_mark(ctdb_db->ltdb->tdb) != 0) {
50 lock all databases - unmark only
52 static int ctdb_lock_all_databases_unmark(struct ctdb_context *ctdb)
54 struct ctdb_db_context *ctdb_db;
55 if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
56 DEBUG(DEBUG_ERR,("Attempt to unmark all databases locked when not frozen\n"));
59 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
60 if (tdb_lockall_unmark(ctdb_db->ltdb->tdb) != 0) {
69 ctdb_control_getvnnmap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
71 CHECK_CONTROL_DATA_SIZE(0);
72 struct ctdb_vnn_map_wire *map;
75 len = offsetof(struct ctdb_vnn_map_wire, map) + sizeof(uint32_t)*ctdb->vnn_map->size;
76 map = talloc_size(outdata, len);
77 CTDB_NO_MEMORY(ctdb, map);
79 map->generation = ctdb->vnn_map->generation;
80 map->size = ctdb->vnn_map->size;
81 memcpy(map->map, ctdb->vnn_map->map, sizeof(uint32_t)*map->size);
84 outdata->dptr = (uint8_t *)map;
90 ctdb_control_setvnnmap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
92 struct ctdb_vnn_map_wire *map = (struct ctdb_vnn_map_wire *)indata.dptr;
94 if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
95 DEBUG(DEBUG_ERR,("Attempt to set vnnmap when not frozen\n"));
99 talloc_free(ctdb->vnn_map);
101 ctdb->vnn_map = talloc(ctdb, struct ctdb_vnn_map);
102 CTDB_NO_MEMORY(ctdb, ctdb->vnn_map);
104 ctdb->vnn_map->generation = map->generation;
105 ctdb->vnn_map->size = map->size;
106 ctdb->vnn_map->map = talloc_array(ctdb->vnn_map, uint32_t, map->size);
107 CTDB_NO_MEMORY(ctdb, ctdb->vnn_map->map);
109 memcpy(ctdb->vnn_map->map, map->map, sizeof(uint32_t)*map->size);
115 ctdb_control_getdbmap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
118 struct ctdb_db_context *ctdb_db;
119 struct ctdb_dbid_map *dbid_map;
121 CHECK_CONTROL_DATA_SIZE(0);
124 for(ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next){
129 outdata->dsize = offsetof(struct ctdb_dbid_map, dbs) + sizeof(dbid_map->dbs[0])*len;
130 outdata->dptr = (unsigned char *)talloc_zero_size(outdata, outdata->dsize);
131 if (!outdata->dptr) {
132 DEBUG(DEBUG_ALERT, (__location__ " Failed to allocate dbmap array\n"));
136 dbid_map = (struct ctdb_dbid_map *)outdata->dptr;
138 for (i=0,ctdb_db=ctdb->db_list;ctdb_db;i++,ctdb_db=ctdb_db->next){
139 dbid_map->dbs[i].dbid = ctdb_db->db_id;
140 dbid_map->dbs[i].persistent = ctdb_db->persistent;
147 ctdb_control_getnodemap(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata, TDB_DATA *outdata)
149 uint32_t i, num_nodes;
150 struct ctdb_node_map *node_map;
152 CHECK_CONTROL_DATA_SIZE(0);
154 num_nodes = ctdb->num_nodes;
156 outdata->dsize = offsetof(struct ctdb_node_map, nodes) + num_nodes*sizeof(struct ctdb_node_and_flags);
157 outdata->dptr = (unsigned char *)talloc_zero_size(outdata, outdata->dsize);
158 if (!outdata->dptr) {
159 DEBUG(DEBUG_ALERT, (__location__ " Failed to allocate nodemap array\n"));
163 node_map = (struct ctdb_node_map *)outdata->dptr;
164 node_map->num = num_nodes;
165 for (i=0; i<num_nodes; i++) {
166 if (parse_ip(ctdb->nodes[i]->address.address, &node_map->nodes[i].addr) == 0) {
167 DEBUG(DEBUG_ERR, (__location__ " Failed to parse %s into a sockaddr\n", ctdb->nodes[i]->address.address));
170 node_map->nodes[i].pnn = ctdb->nodes[i]->pnn;
171 node_map->nodes[i].flags = ctdb->nodes[i]->flags;
178 ctdb_reload_nodes_event(struct event_context *ev, struct timed_event *te,
179 struct timeval t, void *private_data)
182 struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context);
183 int ctdb_tcp_init(struct ctdb_context *);
185 /* shut down the transport */
186 if (ctdb->methods != NULL) {
187 ctdb->methods->shutdown(ctdb);
190 /* start the transport again */
191 ctdb_load_nodes_file(ctdb);
192 ret = ctdb_tcp_init(ctdb);
194 DEBUG(DEBUG_CRIT, (__location__ " Failed to init TCP\n"));
198 if (ctdb->methods == NULL) {
199 DEBUG(DEBUG_ALERT,(__location__ " Can not restart transport. ctdb->methods==NULL\n"));
200 ctdb_fatal(ctdb, "can not reinitialize transport.");
202 ctdb->methods->initialise(ctdb);
203 ctdb->methods->start(ctdb);
209 reload the nodes file after a short delay (so that we can send the response
213 ctdb_control_reload_nodes_file(struct ctdb_context *ctdb, uint32_t opcode)
215 event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(1,0), ctdb_reload_nodes_event, ctdb);
221 a traverse function for pulling all relevent records from pulldb
224 struct ctdb_context *ctdb;
225 struct ctdb_marshall_buffer *pulldata;
230 static int traverse_pulldb(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
232 struct pulldb_data *params = (struct pulldb_data *)p;
233 struct ctdb_rec_data *rec;
235 /* add the record to the blob */
236 rec = ctdb_marshall_record(params->pulldata, 0, key, NULL, data);
238 params->failed = true;
241 params->pulldata = talloc_realloc_size(NULL, params->pulldata, rec->length + params->len);
242 if (params->pulldata == NULL) {
243 DEBUG(DEBUG_ERR,(__location__ " Failed to expand pulldb_data to %u (%u records)\n",
244 rec->length + params->len, params->pulldata->count));
245 params->failed = true;
248 params->pulldata->count++;
249 memcpy(params->len+(uint8_t *)params->pulldata, rec, rec->length);
250 params->len += rec->length;
257 pul a bunch of records from a ltdb, filtering by lmaster
259 int32_t ctdb_control_pull_db(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
261 struct ctdb_control_pulldb *pull;
262 struct ctdb_db_context *ctdb_db;
263 struct pulldb_data params;
264 struct ctdb_marshall_buffer *reply;
266 if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
267 DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_pull_db when not frozen\n"));
271 pull = (struct ctdb_control_pulldb *)indata.dptr;
273 ctdb_db = find_ctdb_db(ctdb, pull->db_id);
275 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", pull->db_id));
279 reply = talloc_zero(outdata, struct ctdb_marshall_buffer);
280 CTDB_NO_MEMORY(ctdb, reply);
282 reply->db_id = pull->db_id;
285 params.pulldata = reply;
286 params.len = offsetof(struct ctdb_marshall_buffer, data);
287 params.failed = false;
289 if (ctdb_lock_all_databases_mark(ctdb) != 0) {
290 DEBUG(DEBUG_ERR,(__location__ " Failed to get lock on entired db - failing\n"));
294 if (tdb_traverse_read(ctdb_db->ltdb->tdb, traverse_pulldb, ¶ms) == -1) {
295 DEBUG(DEBUG_ERR,(__location__ " Failed to get traverse db '%s'\n", ctdb_db->db_name));
296 ctdb_lock_all_databases_unmark(ctdb);
297 talloc_free(params.pulldata);
301 ctdb_lock_all_databases_unmark(ctdb);
303 outdata->dptr = (uint8_t *)params.pulldata;
304 outdata->dsize = params.len;
310 push a bunch of records into a ltdb, filtering by rsn
312 int32_t ctdb_control_push_db(struct ctdb_context *ctdb, TDB_DATA indata)
314 struct ctdb_marshall_buffer *reply = (struct ctdb_marshall_buffer *)indata.dptr;
315 struct ctdb_db_context *ctdb_db;
317 struct ctdb_rec_data *rec;
319 if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
320 DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_push_db when not frozen\n"));
324 if (indata.dsize < offsetof(struct ctdb_marshall_buffer, data)) {
325 DEBUG(DEBUG_ERR,(__location__ " invalid data in pulldb reply\n"));
329 ctdb_db = find_ctdb_db(ctdb, reply->db_id);
331 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", reply->db_id));
335 if (ctdb_lock_all_databases_mark(ctdb) != 0) {
336 DEBUG(DEBUG_ERR,(__location__ " Failed to get lock on entired db - failing\n"));
340 rec = (struct ctdb_rec_data *)&reply->data[0];
342 DEBUG(DEBUG_INFO,("starting push of %u records for dbid 0x%x\n",
343 reply->count, reply->db_id));
345 for (i=0;i<reply->count;i++) {
347 struct ctdb_ltdb_header *hdr;
349 key.dptr = &rec->data[0];
350 key.dsize = rec->keylen;
351 data.dptr = &rec->data[key.dsize];
352 data.dsize = rec->datalen;
354 if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
355 DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record\n"));
358 hdr = (struct ctdb_ltdb_header *)data.dptr;
359 data.dptr += sizeof(*hdr);
360 data.dsize -= sizeof(*hdr);
362 ret = ctdb_ltdb_store(ctdb_db, key, hdr, data);
364 DEBUG(DEBUG_CRIT, (__location__ " Unable to store record\n"));
368 rec = (struct ctdb_rec_data *)(rec->length + (uint8_t *)rec);
371 DEBUG(DEBUG_DEBUG,("finished push of %u records for dbid 0x%x\n",
372 reply->count, reply->db_id));
374 ctdb_lock_all_databases_unmark(ctdb);
378 ctdb_lock_all_databases_unmark(ctdb);
383 static int traverse_setdmaster(struct tdb_context *tdb, TDB_DATA key, TDB_DATA data, void *p)
385 uint32_t *dmaster = (uint32_t *)p;
386 struct ctdb_ltdb_header *header = (struct ctdb_ltdb_header *)data.dptr;
389 /* skip if already correct */
390 if (header->dmaster == *dmaster) {
394 header->dmaster = *dmaster;
396 ret = tdb_store(tdb, key, data, TDB_REPLACE);
398 DEBUG(DEBUG_CRIT,(__location__ " failed to write tdb data back ret:%d\n",ret));
402 /* TODO: add error checking here */
407 int32_t ctdb_control_set_dmaster(struct ctdb_context *ctdb, TDB_DATA indata)
409 struct ctdb_control_set_dmaster *p = (struct ctdb_control_set_dmaster *)indata.dptr;
410 struct ctdb_db_context *ctdb_db;
412 if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
413 DEBUG(DEBUG_DEBUG,("rejecting ctdb_control_set_dmaster when not frozen\n"));
417 ctdb_db = find_ctdb_db(ctdb, p->db_id);
419 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", p->db_id));
423 if (ctdb_lock_all_databases_mark(ctdb) != 0) {
424 DEBUG(DEBUG_ERR,(__location__ " Failed to get lock on entired db - failing\n"));
428 tdb_traverse(ctdb_db->ltdb->tdb, traverse_setdmaster, &p->dmaster);
430 ctdb_lock_all_databases_unmark(ctdb);
435 struct ctdb_set_recmode_state {
436 struct ctdb_context *ctdb;
437 struct ctdb_req_control *c;
440 struct timed_event *te;
441 struct fd_event *fde;
446 called if our set_recmode child times out. this would happen if
447 ctdb_recovery_lock() would block.
449 static void ctdb_set_recmode_timeout(struct event_context *ev, struct timed_event *te,
450 struct timeval t, void *private_data)
452 struct ctdb_set_recmode_state *state = talloc_get_type(private_data,
453 struct ctdb_set_recmode_state);
455 ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "timeout in ctdb_set_recmode");
460 /* when we free the recmode state we must kill any child process.
462 static int set_recmode_destructor(struct ctdb_set_recmode_state *state)
464 kill(state->child, SIGKILL);
468 /* this is called when the client process has completed ctdb_recovery_lock()
469 and has written data back to us through the pipe.
471 static void set_recmode_handler(struct event_context *ev, struct fd_event *fde,
472 uint16_t flags, void *private_data)
474 struct ctdb_set_recmode_state *state= talloc_get_type(private_data,
475 struct ctdb_set_recmode_state);
479 /* we got a response from our child process so we can abort the
482 talloc_free(state->te);
486 /* read the childs status when trying to lock the reclock file.
487 child wrote 0 if everything is fine and 1 if it did manage
488 to lock the file, which would be a problem since that means
489 we got a request to exit from recovery but we could still lock
490 the file which at this time SHOULD be locked by the recovery
491 daemon on the recmaster
493 ret = read(state->fd[0], &c, 1);
494 if (ret != 1 || c != 0) {
495 ctdb_request_control_reply(state->ctdb, state->c, NULL, -1, "managed to lock reclock file from inside daemon");
500 state->ctdb->recovery_mode = state->recmode;
502 ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL);
508 set the recovery mode
510 int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb,
511 struct ctdb_req_control *c,
512 TDB_DATA indata, bool *async_reply,
513 const char **errormsg)
515 uint32_t recmode = *(uint32_t *)indata.dptr;
517 struct ctdb_set_recmode_state *state;
518 pid_t parent = getpid();
520 if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
521 DEBUG(DEBUG_ERR,("Attempt to change recovery mode to %u when not frozen\n",
523 (*errormsg) = "Cannot change recovery mode while not frozen";
527 if (recmode != ctdb->recovery_mode) {
528 DEBUG(DEBUG_NOTICE,(__location__ " Recovery mode set to %s\n",
529 recmode==CTDB_RECOVERY_NORMAL?"NORMAL":"ACTIVE"));
532 if (recmode != CTDB_RECOVERY_NORMAL ||
533 ctdb->recovery_mode != CTDB_RECOVERY_ACTIVE) {
534 ctdb->recovery_mode = recmode;
538 /* some special handling when ending recovery mode */
540 /* force the databased to thaw */
541 if (ctdb->freeze_handle) {
542 ctdb_control_thaw(ctdb);
545 state = talloc(ctdb, struct ctdb_set_recmode_state);
546 CTDB_NO_MEMORY(ctdb, state);
548 /* For the rest of what needs to be done, we need to do this in
549 a child process since
550 1, the call to ctdb_recovery_lock() can block if the cluster
551 filesystem is in the process of recovery.
552 2, running of the script may take a while.
554 ret = pipe(state->fd);
557 DEBUG(DEBUG_CRIT,(__location__ " Failed to open pipe for set_recmode child\n"));
561 state->child = fork();
562 if (state->child == (pid_t)-1) {
569 if (state->child == 0) {
573 /* we should not be able to get the lock on the nodes list,
574 as it should be held by the recovery master
576 if (ctdb_recovery_lock(ctdb, false)) {
577 DEBUG(DEBUG_CRIT,("ERROR: recovery lock file %s not locked when recovering!\n", ctdb->recovery_lock_file));
581 write(state->fd[1], &cc, 1);
582 /* make sure we die when our parent dies */
583 while (kill(parent, 0) == 0 || errno != ESRCH) {
590 talloc_set_destructor(state, set_recmode_destructor);
592 state->te = event_add_timed(ctdb->ev, state, timeval_current_ofs(3, 0),
593 ctdb_set_recmode_timeout, state);
595 state->fde = event_add_fd(ctdb->ev, state, state->fd[0],
596 EVENT_FD_READ|EVENT_FD_AUTOCLOSE,
599 if (state->fde == NULL) {
605 state->recmode = recmode;
606 state->c = talloc_steal(state, c);
615 try and get the recovery lock in shared storage - should only work
616 on the recovery master recovery daemon. Anywhere else is a bug
618 bool ctdb_recovery_lock(struct ctdb_context *ctdb, bool keep)
622 if (ctdb->recovery_lock_fd != -1) {
623 close(ctdb->recovery_lock_fd);
625 ctdb->recovery_lock_fd = open(ctdb->recovery_lock_file, O_RDWR|O_CREAT, 0600);
626 if (ctdb->recovery_lock_fd == -1) {
627 DEBUG(DEBUG_ERR,("ctdb_recovery_lock: Unable to open %s - (%s)\n",
628 ctdb->recovery_lock_file, strerror(errno)));
632 set_close_on_exec(ctdb->recovery_lock_fd);
634 lock.l_type = F_WRLCK;
635 lock.l_whence = SEEK_SET;
640 if (fcntl(ctdb->recovery_lock_fd, F_SETLK, &lock) != 0) {
641 close(ctdb->recovery_lock_fd);
642 ctdb->recovery_lock_fd = -1;
644 DEBUG(DEBUG_CRIT,("ctdb_recovery_lock: Failed to get recovery lock on '%s'\n", ctdb->recovery_lock_file));
650 close(ctdb->recovery_lock_fd);
651 ctdb->recovery_lock_fd = -1;
654 DEBUG(DEBUG_NOTICE,("ctdb_recovery_lock: Got recovery lock on '%s'\n", ctdb->recovery_lock_file));
660 delete a record as part of the vacuum process
661 only delete if we are not lmaster or dmaster, and our rsn is <= the provided rsn
662 use non-blocking locks
664 return 0 if the record was successfully deleted (i.e. it does not exist
665 when the function returns)
666 or !0 is the record still exists in the tdb after returning.
668 static int delete_tdb_record(struct ctdb_context *ctdb, struct ctdb_db_context *ctdb_db, struct ctdb_rec_data *rec)
671 struct ctdb_ltdb_header *hdr, *hdr2;
673 /* these are really internal tdb functions - but we need them here for
674 non-blocking lock of the freelist */
675 int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype);
676 int tdb_unlock(struct tdb_context *tdb, int list, int ltype);
679 key.dsize = rec->keylen;
680 key.dptr = &rec->data[0];
681 data.dsize = rec->datalen;
682 data.dptr = &rec->data[rec->keylen];
684 if (ctdb_lmaster(ctdb, &key) == ctdb->pnn) {
685 DEBUG(DEBUG_INFO,(__location__ " Called delete on record where we are lmaster\n"));
689 if (data.dsize != sizeof(struct ctdb_ltdb_header)) {
690 DEBUG(DEBUG_ERR,(__location__ " Bad record size\n"));
694 hdr = (struct ctdb_ltdb_header *)data.dptr;
696 /* use a non-blocking lock */
697 if (tdb_chainlock_nonblock(ctdb_db->ltdb->tdb, key) != 0) {
701 data = tdb_fetch(ctdb_db->ltdb->tdb, key);
702 if (data.dptr == NULL) {
703 tdb_chainunlock(ctdb_db->ltdb->tdb, key);
707 if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
708 if (tdb_lock_nonblock(ctdb_db->ltdb->tdb, -1, F_WRLCK) == 0) {
709 tdb_delete(ctdb_db->ltdb->tdb, key);
710 tdb_unlock(ctdb_db->ltdb->tdb, -1, F_WRLCK);
711 DEBUG(DEBUG_CRIT,(__location__ " Deleted corrupt record\n"));
713 tdb_chainunlock(ctdb_db->ltdb->tdb, key);
718 hdr2 = (struct ctdb_ltdb_header *)data.dptr;
720 if (hdr2->rsn > hdr->rsn) {
721 tdb_chainunlock(ctdb_db->ltdb->tdb, key);
722 DEBUG(DEBUG_INFO,(__location__ " Skipping record with rsn=%llu - called with rsn=%llu\n",
723 (unsigned long long)hdr2->rsn, (unsigned long long)hdr->rsn));
728 if (hdr2->dmaster == ctdb->pnn) {
729 tdb_chainunlock(ctdb_db->ltdb->tdb, key);
730 DEBUG(DEBUG_INFO,(__location__ " Attempted delete record where we are the dmaster\n"));
735 if (tdb_lock_nonblock(ctdb_db->ltdb->tdb, -1, F_WRLCK) != 0) {
736 tdb_chainunlock(ctdb_db->ltdb->tdb, key);
741 if (tdb_delete(ctdb_db->ltdb->tdb, key) != 0) {
742 tdb_unlock(ctdb_db->ltdb->tdb, -1, F_WRLCK);
743 tdb_chainunlock(ctdb_db->ltdb->tdb, key);
744 DEBUG(DEBUG_INFO,(__location__ " Failed to delete record\n"));
749 tdb_unlock(ctdb_db->ltdb->tdb, -1, F_WRLCK);
750 tdb_chainunlock(ctdb_db->ltdb->tdb, key);
757 struct recovery_callback_state {
758 struct ctdb_req_control *c;
763 called when the 'recovered' event script has finished
765 static void ctdb_end_recovery_callback(struct ctdb_context *ctdb, int status, void *p)
767 struct recovery_callback_state *state = talloc_get_type(p, struct recovery_callback_state);
769 ctdb_enable_monitoring(ctdb);
772 DEBUG(DEBUG_ERR,(__location__ " recovered event script failed (status %d)\n", status));
775 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
778 gettimeofday(&ctdb->last_recovery_finished, NULL);
782 recovery has finished
784 int32_t ctdb_control_end_recovery(struct ctdb_context *ctdb,
785 struct ctdb_req_control *c,
789 struct recovery_callback_state *state;
791 DEBUG(DEBUG_NOTICE,("Recovery has finished\n"));
793 state = talloc(ctdb, struct recovery_callback_state);
794 CTDB_NO_MEMORY(ctdb, state);
796 state->c = talloc_steal(state, c);
798 ctdb_disable_monitoring(ctdb);
800 ret = ctdb_event_script_callback(ctdb,
801 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
803 ctdb_end_recovery_callback,
807 ctdb_enable_monitoring(ctdb);
809 DEBUG(DEBUG_ERR,(__location__ " Failed to end recovery\n"));
814 /* tell the control that we will be reply asynchronously */
820 called when the 'startrecovery' event script has finished
822 static void ctdb_start_recovery_callback(struct ctdb_context *ctdb, int status, void *p)
824 struct recovery_callback_state *state = talloc_get_type(p, struct recovery_callback_state);
827 DEBUG(DEBUG_ERR,(__location__ " startrecovery event script failed (status %d)\n", status));
830 ctdb_request_control_reply(ctdb, state->c, NULL, status, NULL);
835 run the startrecovery eventscript
837 int32_t ctdb_control_start_recovery(struct ctdb_context *ctdb,
838 struct ctdb_req_control *c,
842 struct recovery_callback_state *state;
844 DEBUG(DEBUG_NOTICE,(__location__ " startrecovery eventscript has been invoked\n"));
845 gettimeofday(&ctdb->last_recovery_started, NULL);
847 state = talloc(ctdb, struct recovery_callback_state);
848 CTDB_NO_MEMORY(ctdb, state);
850 state->c = talloc_steal(state, c);
852 ctdb_disable_monitoring(ctdb);
854 ret = ctdb_event_script_callback(ctdb,
855 timeval_current_ofs(ctdb->tunable.script_timeout, 0),
857 ctdb_start_recovery_callback,
858 state, "startrecovery");
861 DEBUG(DEBUG_ERR,(__location__ " Failed to start recovery\n"));
866 /* tell the control that we will be reply asynchronously */
872 try to delete all these records as part of the vacuuming process
873 and return the records we failed to delete
875 int32_t ctdb_control_try_delete_records(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata)
877 struct ctdb_marshall_buffer *reply = (struct ctdb_marshall_buffer *)indata.dptr;
878 struct ctdb_db_context *ctdb_db;
880 struct ctdb_rec_data *rec;
881 struct ctdb_marshall_buffer *records;
883 if (indata.dsize < offsetof(struct ctdb_marshall_buffer, data)) {
884 DEBUG(DEBUG_ERR,(__location__ " invalid data in try_delete_records\n"));
888 ctdb_db = find_ctdb_db(ctdb, reply->db_id);
890 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%08x\n", reply->db_id));
895 DEBUG(DEBUG_DEBUG,("starting try_delete_records of %u records for dbid 0x%x\n",
896 reply->count, reply->db_id));
899 /* create a blob to send back the records we couldnt delete */
900 records = (struct ctdb_marshall_buffer *)
901 talloc_zero_size(outdata,
902 offsetof(struct ctdb_marshall_buffer, data));
903 if (records == NULL) {
904 DEBUG(DEBUG_ERR,(__location__ " Out of memory\n"));
907 records->db_id = ctdb_db->db_id;
910 rec = (struct ctdb_rec_data *)&reply->data[0];
911 for (i=0;i<reply->count;i++) {
914 key.dptr = &rec->data[0];
915 key.dsize = rec->keylen;
916 data.dptr = &rec->data[key.dsize];
917 data.dsize = rec->datalen;
919 if (data.dsize < sizeof(struct ctdb_ltdb_header)) {
920 DEBUG(DEBUG_CRIT,(__location__ " bad ltdb record in indata\n"));
924 /* If we cant delete the record we must add it to the reply
925 so the lmaster knows it may not purge this record
927 if (delete_tdb_record(ctdb, ctdb_db, rec) != 0) {
929 struct ctdb_ltdb_header *hdr;
931 hdr = (struct ctdb_ltdb_header *)data.dptr;
932 data.dptr += sizeof(*hdr);
933 data.dsize -= sizeof(*hdr);
935 DEBUG(DEBUG_INFO, (__location__ " Failed to vacuum delete record with hash 0x%08x\n", ctdb_hash(&key)));
937 old_size = talloc_get_size(records);
938 records = talloc_realloc_size(outdata, records, old_size + rec->length);
939 if (records == NULL) {
940 DEBUG(DEBUG_ERR,(__location__ " Failed to expand\n"));
944 memcpy(old_size+(uint8_t *)records, rec, rec->length);
947 rec = (struct ctdb_rec_data *)(rec->length + (uint8_t *)rec);
951 outdata->dptr = (uint8_t *)records;
952 outdata->dsize = talloc_get_size(records);
960 int32_t ctdb_control_get_capabilities(struct ctdb_context *ctdb, TDB_DATA *outdata)
962 uint32_t *capabilities = NULL;
964 capabilities = talloc(outdata, uint32_t);
965 CTDB_NO_MEMORY(ctdb, capabilities);
966 *capabilities = ctdb->capabilities;
968 outdata->dsize = sizeof(uint32_t);
969 outdata->dptr = (uint8_t *)capabilities;
974 static void ctdb_recd_ping_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *p)
976 struct ctdb_context *ctdb = talloc_get_type(p, struct ctdb_context);
977 uint32_t *count = talloc_get_type(ctdb->recd_ping_count, uint32_t);
979 DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Count : %u\n", *count));
981 if (*count < ctdb->tunable.recd_ping_failcount) {
983 event_add_timed(ctdb->ev, ctdb->recd_ping_count,
984 timeval_current_ofs(ctdb->tunable.recd_ping_timeout, 0),
985 ctdb_recd_ping_timeout, ctdb);
989 DEBUG(DEBUG_ERR, (__location__ " Final timeout for recovery daemon ping. Shutting down ctdb daemon\n"));
991 ctdb_stop_recoverd(ctdb);
992 ctdb_stop_keepalive(ctdb);
993 ctdb_stop_monitoring(ctdb);
994 ctdb_release_all_ips(ctdb);
995 if (ctdb->methods != NULL) {
996 ctdb->methods->shutdown(ctdb);
998 ctdb_event_script(ctdb, "shutdown");
999 DEBUG(DEBUG_ERR, (__location__ " Recovery daemon ping timeout. Daemon has been shut down.\n"));
1003 /* The recovery daemon will ping us at regular intervals.
1004 If we havent been pinged for a while we assume the recovery
1005 daemon is inoperable and we shut down.
1007 int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb)
1009 talloc_free(ctdb->recd_ping_count);
1011 ctdb->recd_ping_count = talloc_zero(ctdb, uint32_t);
1012 CTDB_NO_MEMORY(ctdb, ctdb->recd_ping_count);
1014 if (ctdb->tunable.recd_ping_timeout != 0) {
1015 event_add_timed(ctdb->ev, ctdb->recd_ping_count,
1016 timeval_current_ofs(ctdb->tunable.recd_ping_timeout, 0),
1017 ctdb_recd_ping_timeout, ctdb);