4 Copyright (C) Andrew Tridgell 2007
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "lib/tevent/tevent.h"
21 #include "lib/tdb/include/tdb.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/util/dlinklist.h"
28 #include "../common/rb_tree.h"
30 static bool later_db(const char *name)
32 return (strstr(name, "notify") || strstr(name, "serverid"));
38 static int ctdb_lock_all_databases(struct ctdb_context *ctdb, uint32_t priority)
40 struct ctdb_db_context *ctdb_db;
42 /* This double loop is for backward compatibility and deadlock
43 avoidance for old samba versions that not yet support
45 This code shall be removed later
47 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
48 if (ctdb_db->priority != priority) {
51 if (later_db(ctdb_db->db_name)) {
54 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
55 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
56 DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
60 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
61 if (ctdb_db->priority != priority) {
64 if (!later_db(ctdb_db->db_name)) {
67 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
68 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
69 DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
77 a list of control requests waiting for a freeze lock child to get
80 struct ctdb_freeze_waiter {
81 struct ctdb_freeze_waiter *next, *prev;
82 struct ctdb_context *ctdb;
83 struct ctdb_req_control *c;
88 /* a handle to a freeze lock child process */
89 struct ctdb_freeze_handle {
90 struct ctdb_context *ctdb;
94 struct ctdb_freeze_waiter *waiters;
98 destroy a freeze handle
100 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
102 struct ctdb_context *ctdb = h->ctdb;
103 struct ctdb_db_context *ctdb_db;
105 DEBUG(DEBUG_ERR,("Release freeze handler for prio %u\n", h->priority));
107 /* cancel any pending transactions */
108 if (ctdb->freeze_transaction_started) {
109 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
110 if (ctdb_db->priority != h->priority) {
113 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
114 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
115 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
118 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
120 ctdb->freeze_transaction_started = false;
123 ctdb->freeze_mode[h->priority] = CTDB_FREEZE_NONE;
124 ctdb->freeze_handles[h->priority] = NULL;
126 kill(h->child, SIGKILL);
131 called when the child writes its status to us
133 static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde,
134 uint16_t flags, void *private_data)
136 struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle);
138 struct ctdb_freeze_waiter *w;
140 if (h->ctdb->freeze_mode[h->priority] == CTDB_FREEZE_FROZEN) {
141 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
146 if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
147 DEBUG(DEBUG_ERR,("read error from freeze lock child\n"));
152 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
153 /* we didn't get the locks - destroy the handle */
158 h->ctdb->freeze_mode[h->priority] = CTDB_FREEZE_FROZEN;
160 /* notify the waiters */
161 if (h != h->ctdb->freeze_handles[h->priority]) {
162 DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
164 while ((w = h->waiters)) {
166 DLIST_REMOVE(h->waiters, w);
172 create a child which gets locks on all the open databases, then calls the callback telling the parent
175 static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, uint32_t priority)
177 struct ctdb_freeze_handle *h;
179 struct fd_event *fde;
181 h = talloc_zero(ctdb, struct ctdb_freeze_handle);
182 CTDB_NO_MEMORY_NULL(ctdb, h);
185 h->priority = priority;
187 if (pipe(fd) == -1) {
188 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
193 h->child = ctdb_fork(ctdb);
194 if (h->child == -1) {
195 DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
206 debug_extra = talloc_asprintf(NULL, "freeze_lock-%u:", priority);
207 ret = ctdb_lock_all_databases(ctdb, priority);
212 ret = write(fd[1], &ret, sizeof(ret));
213 if (ret != sizeof(ret)) {
214 DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. ret:%d errno:%u\n", ret, errno));
220 if (kill(ctdb->ctdbd_pid, 0) != 0) {
221 DEBUG(DEBUG_ERR,("Parent died. Exiting lock wait child\n"));
228 talloc_set_destructor(h, ctdb_freeze_handle_destructor);
231 set_close_on_exec(fd[0]);
236 fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ,
237 ctdb_freeze_lock_handler, h);
239 DEBUG(DEBUG_ERR,("Failed to setup fd event for ctdb_freeze_lock\n"));
244 tevent_fd_set_auto_close(fde);
250 destroy a waiter for a freeze mode change
252 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
254 ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
259 * Run an external script to check if there is a deadlock situation
261 static void ctdb_debug_locks(void)
263 const char *cmd = getenv("CTDB_DEBUG_LOCKS");
272 /* Execute only in child process */
274 execl(cmd, cmd, NULL);
279 start the freeze process for a certain priority
281 int ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
283 if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
284 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
285 ctdb_fatal(ctdb, "Internal error");
288 if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
289 /* we're already frozen */
293 DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
295 /* Stop any vacuuming going on: we don't want to wait. */
296 ctdb_stop_vacuuming(ctdb);
298 /* if there isn't a freeze lock child then create one */
299 if (ctdb->freeze_handles[priority] == NULL) {
300 ctdb->freeze_handles[priority] = ctdb_freeze_lock(ctdb, priority);
301 CTDB_NO_MEMORY(ctdb, ctdb->freeze_handles[priority]);
302 ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
304 /* The previous free lock child has not yet been able to get locks.
305 * Invoke debugging script */
315 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
317 struct ctdb_freeze_waiter *w;
320 priority = (uint32_t)c->srvid;
323 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
327 if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
328 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
332 if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
333 DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
334 /* we're already frozen */
338 if (ctdb_start_freeze(ctdb, priority) != 0) {
339 DEBUG(DEBUG_ERR,(__location__ " Failed to start freezing databases with priority %u\n", priority));
343 /* add ourselves to list of waiters */
344 if (ctdb->freeze_handles[priority] == NULL) {
345 DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
349 w = talloc(ctdb->freeze_handles[priority], struct ctdb_freeze_waiter);
350 CTDB_NO_MEMORY(ctdb, w);
352 w->c = talloc_steal(w, c);
353 w->priority = priority;
355 talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
356 DLIST_ADD(ctdb->freeze_handles[priority]->waiters, w);
358 /* we won't reply till later */
365 block until we are frozen, used during daemon startup
367 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
371 for (i=1; i<=NUM_DB_PRIORITIES; i++) {
372 if (ctdb_start_freeze(ctdb, i)) {
373 DEBUG(DEBUG_ERR,(__location__ " Failed to freeze databases of prio %u\n", i));
377 /* block until frozen */
378 while (ctdb->freeze_mode[i] == CTDB_FREEZE_PENDING) {
379 event_loop_once(ctdb->ev);
387 static void thaw_priority(struct ctdb_context *ctdb, uint32_t priority)
389 DEBUG(DEBUG_ERR,("Thawing priority %u\n", priority));
391 /* cancel any pending transactions */
392 if (ctdb->freeze_transaction_started) {
393 struct ctdb_db_context *ctdb_db;
395 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
396 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
397 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
398 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
401 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
404 ctdb->freeze_transaction_started = false;
407 /* this hack can be used to get a copy of the databases at the end of a recovery */
408 system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
412 /* and this one for local testing */
413 system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
416 if (ctdb->freeze_handles[priority] != NULL) {
417 talloc_free(ctdb->freeze_handles[priority]);
418 ctdb->freeze_handles[priority] = NULL;
425 int32_t ctdb_control_thaw(struct ctdb_context *ctdb, uint32_t priority)
428 if (priority > NUM_DB_PRIORITIES) {
429 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
435 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
436 thaw_priority(ctdb, i);
439 thaw_priority(ctdb, priority);
442 ctdb_call_resend_all(ctdb);
448 start a transaction on all databases - used for recovery
450 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
452 struct ctdb_db_context *ctdb_db;
455 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
456 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
457 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
462 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
465 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
467 if (ctdb->freeze_transaction_started) {
468 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
469 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
471 /* not a fatal error */
475 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
477 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
480 DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
486 ctdb->freeze_transaction_started = true;
487 ctdb->freeze_transaction_id = id;
493 cancel a transaction for all databases - used for recovery
495 int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb)
497 struct ctdb_db_context *ctdb_db;
499 DEBUG(DEBUG_ERR,(__location__ " recovery transaction cancelled called\n"));
501 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
502 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
504 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
505 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n", ctdb_db->db_name));
506 /* not a fatal error */
509 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
512 ctdb->freeze_transaction_started = false;
518 commit transactions on all databases
520 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
522 struct ctdb_db_context *ctdb_db;
524 int healthy_nodes = 0;
526 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
527 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
528 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
533 if (!ctdb->freeze_transaction_started) {
534 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
538 if (id != ctdb->freeze_transaction_id) {
539 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
543 DEBUG(DEBUG_DEBUG,(__location__ " num_nodes[%d]\n", ctdb->num_nodes));
544 for (i=0; i < ctdb->num_nodes; i++) {
545 DEBUG(DEBUG_DEBUG,(__location__ " node[%d].flags[0x%X]\n",
546 i, ctdb->nodes[i]->flags));
547 if (ctdb->nodes[i]->flags == 0) {
551 DEBUG(DEBUG_INFO,(__location__ " healthy_nodes[%d]\n", healthy_nodes));
553 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
556 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
557 ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
559 DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
563 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
565 ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, healthy_nodes);
567 DEBUG(DEBUG_CRIT,(__location__ " Failed to update persistent health for db '%s'. "
568 "Cancel all remaining transactions and resetting transaction_started to false.\n",
574 ctdb->freeze_transaction_started = false;
575 ctdb->freeze_transaction_id = 0;
580 /* cancel any pending transactions */
581 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
582 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
583 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
584 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
587 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
589 ctdb->freeze_transaction_started = false;
595 wipe a database - only possible when in a frozen transaction
597 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
599 struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
600 struct ctdb_db_context *ctdb_db;
602 ctdb_db = find_ctdb_db(ctdb, w.db_id);
604 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
608 if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) {
609 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
613 if (!ctdb->freeze_transaction_started) {
614 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
618 if (w.transaction_id != ctdb->freeze_transaction_id) {
619 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
623 if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
624 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
629 if (!ctdb_db->persistent) {
630 talloc_free(ctdb_db->delete_queue);
631 ctdb_db->delete_queue = trbt_create(ctdb_db, 0);
632 if (ctdb_db->delete_queue == NULL) {
633 DEBUG(DEBUG_ERR, (__location__ " Failed to re-create "
634 "the vacuum tree.\n"));