b209b0e0d1b22e1eabd79cfcfd01e23f029ba31c
[ctdb.git] / server / ctdb_freeze.c
1 /* 
2    ctdb freeze handling
3
4    Copyright (C) Andrew Tridgell  2007
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "includes.h"
20 #include "lib/tevent/tevent.h"
21 #include "lib/tdb/include/tdb.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/util/dlinklist.h"
27 #include "db_wrap.h"
28 #include "../common/rb_tree.h"
29
30 static bool later_db(const char *name)
31 {
32         return (strstr(name, "notify") || strstr(name, "serverid"));
33 }
34
35 /*
36   lock all databases
37  */
38 static int ctdb_lock_all_databases(struct ctdb_context *ctdb, uint32_t priority)
39 {
40         struct ctdb_db_context *ctdb_db;
41         /* REMOVE later */
42         /* This double loop is for backward compatibility and deadlock
43            avoidance for old samba versions that not yet support
44            the set prio call.
45            This code shall be removed later
46         */
47         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
48                 if (ctdb_db->priority != priority) {
49                         continue;
50                 }
51                 if (later_db(ctdb_db->db_name)) {
52                         continue;
53                 }
54                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
55                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
56                         DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
57                         return -1;
58                 }
59         }
60         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
61                 if (ctdb_db->priority != priority) {
62                         continue;
63                 }
64                 if (!later_db(ctdb_db->db_name)) {
65                         continue;
66                 }
67                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
68                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
69                         DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
70                         return -1;
71                 }
72         }
73         return 0;
74 }
75
76 /*
77   a list of control requests waiting for a freeze lock child to get
78   the database locks
79  */
80 struct ctdb_freeze_waiter {
81         struct ctdb_freeze_waiter *next, *prev;
82         struct ctdb_context *ctdb;
83         struct ctdb_req_control *c;
84         uint32_t priority;
85         int32_t status;
86 };
87
88 /* a handle to a freeze lock child process */
89 struct ctdb_freeze_handle {
90         struct ctdb_context *ctdb;
91         uint32_t priority;
92         pid_t child;
93         int fd;
94         struct ctdb_freeze_waiter *waiters;
95 };
96
97 /*
98   destroy a freeze handle
99  */     
100 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
101 {
102         struct ctdb_context *ctdb = h->ctdb;
103         struct ctdb_db_context *ctdb_db;
104
105         DEBUG(DEBUG_ERR,("Release freeze handler for prio %u\n", h->priority));
106
107         /* cancel any pending transactions */
108         if (ctdb->freeze_transaction_started) {
109                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
110                         if (ctdb_db->priority != h->priority) {
111                                 continue;
112                         }
113                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
114                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
115                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
116                                          ctdb_db->db_name));
117                         }
118                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
119                 }
120                 ctdb->freeze_transaction_started = false;
121         }
122
123         ctdb->freeze_mode[h->priority]    = CTDB_FREEZE_NONE;
124         ctdb->freeze_handles[h->priority] = NULL;
125
126         kill(h->child, SIGKILL);
127         return 0;
128 }
129
130 /*
131   called when the child writes its status to us
132  */
133 static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde, 
134                                        uint16_t flags, void *private_data)
135 {
136         struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle);
137         int32_t status;
138         struct ctdb_freeze_waiter *w;
139
140         if (h->ctdb->freeze_mode[h->priority] == CTDB_FREEZE_FROZEN) {
141                 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
142                 talloc_free(h);
143                 return;
144         }
145
146         if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
147                 DEBUG(DEBUG_ERR,("read error from freeze lock child\n"));
148                 status = -1;
149         }
150
151         if (status == -1) {
152                 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
153                 /* we didn't get the locks - destroy the handle */
154                 talloc_free(h);
155                 return;
156         }
157
158         h->ctdb->freeze_mode[h->priority] = CTDB_FREEZE_FROZEN;
159
160         /* notify the waiters */
161         if (h != h->ctdb->freeze_handles[h->priority]) {
162                 DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
163         }
164         while ((w = h->waiters)) {
165                 w->status = status;
166                 DLIST_REMOVE(h->waiters, w);
167                 talloc_free(w);
168         }
169 }
170
171 /*
172   create a child which gets locks on all the open databases, then calls the callback telling the parent
173   that it is done
174  */
175 static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, uint32_t priority)
176 {
177         struct ctdb_freeze_handle *h;
178         int fd[2];
179         struct fd_event *fde;
180
181         h = talloc_zero(ctdb, struct ctdb_freeze_handle);
182         CTDB_NO_MEMORY_NULL(ctdb, h);
183
184         h->ctdb     = ctdb;
185         h->priority = priority;
186
187         if (pipe(fd) == -1) {
188                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
189                 talloc_free(h);
190                 return NULL;
191         }
192         
193         h->child = ctdb_fork(ctdb);
194         if (h->child == -1) {
195                 DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
196                 talloc_free(h);
197                 return NULL;
198         }
199
200         if (h->child == 0) {
201                 int ret;
202
203                 /* in the child */
204                 close(fd[0]);
205
206                 debug_extra = talloc_asprintf(NULL, "freeze_lock-%u:", priority);
207                 ret = ctdb_lock_all_databases(ctdb, priority);
208                 if (ret != 0) {
209                         _exit(0);
210                 }
211
212                 ret = write(fd[1], &ret, sizeof(ret));
213                 if (ret != sizeof(ret)) {
214                         DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. ret:%d errno:%u\n", ret, errno));
215                         _exit(1);
216                 }
217
218                 while (1) {
219                         sleep(1);
220                         if (kill(ctdb->ctdbd_pid, 0) != 0) {
221                                 DEBUG(DEBUG_ERR,("Parent died. Exiting lock wait child\n"));
222
223                                 _exit(0);
224                         }
225                 }
226         }
227
228         talloc_set_destructor(h, ctdb_freeze_handle_destructor);
229
230         close(fd[1]);
231         set_close_on_exec(fd[0]);
232
233         h->fd = fd[0];
234
235
236         fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ,
237                            ctdb_freeze_lock_handler, h);
238         if (fde == NULL) {
239                 DEBUG(DEBUG_ERR,("Failed to setup fd event for ctdb_freeze_lock\n"));
240                 close(fd[0]);
241                 talloc_free(h);
242                 return NULL;
243         }
244         tevent_fd_set_auto_close(fde);
245
246         return h;
247 }
248
249 /*
250   destroy a waiter for a freeze mode change
251  */
252 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
253 {
254         ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
255         return 0;
256 }
257
258 /*
259  * Run an external script to check if there is a deadlock situation
260  */
261 static void ctdb_debug_locks(void)
262 {
263         const char *cmd = getenv("CTDB_DEBUG_LOCKS");
264         int pid;
265
266         if (cmd == NULL) {
267                 return;
268         }
269
270         pid = fork();
271
272         /* Execute only in child process */
273         if (pid == 0) {
274                 execl(cmd, cmd, NULL);
275         }
276 }
277
278 /*
279   start the freeze process for a certain priority
280  */
281 int ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
282 {
283         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
284                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
285                 ctdb_fatal(ctdb, "Internal error");
286         }
287
288         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
289                 /* we're already frozen */
290                 return 0;
291         }
292
293         DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
294
295         /* Stop any vacuuming going on: we don't want to wait. */
296         ctdb_stop_vacuuming(ctdb);
297
298         /* if there isn't a freeze lock child then create one */
299         if (ctdb->freeze_handles[priority] == NULL) {
300                 ctdb->freeze_handles[priority] = ctdb_freeze_lock(ctdb, priority);
301                 CTDB_NO_MEMORY(ctdb, ctdb->freeze_handles[priority]);
302                 ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
303         } else {
304                 /* The previous free lock child has not yet been able to get locks.
305                  * Invoke debugging script */
306                 ctdb_debug_locks();
307         }
308
309         return 0;
310 }
311
312 /*
313   freeze the databases
314  */
315 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
316 {
317         struct ctdb_freeze_waiter *w;
318         uint32_t priority;
319
320         priority = (uint32_t)c->srvid;
321
322         if (priority == 0) {
323                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
324                 priority = 1;
325         }
326
327         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
328                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
329                 return -1;
330         }
331
332         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
333                 DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
334                 /* we're already frozen */
335                 return 0;
336         }
337
338         if (ctdb_start_freeze(ctdb, priority) != 0) {
339                 DEBUG(DEBUG_ERR,(__location__ " Failed to start freezing databases with priority %u\n", priority));
340                 return -1;
341         }
342
343         /* add ourselves to list of waiters */
344         if (ctdb->freeze_handles[priority] == NULL) {
345                 DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
346                 return -1;
347         }
348
349         w = talloc(ctdb->freeze_handles[priority], struct ctdb_freeze_waiter);
350         CTDB_NO_MEMORY(ctdb, w);
351         w->ctdb     = ctdb;
352         w->c        = talloc_steal(w, c);
353         w->priority = priority;
354         w->status   = -1;
355         talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
356         DLIST_ADD(ctdb->freeze_handles[priority]->waiters, w);
357
358         /* we won't reply till later */
359         *async_reply = True;
360         return 0;
361 }
362
363
364 /*
365   block until we are frozen, used during daemon startup
366  */
367 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
368 {
369         int i;
370
371         for (i=1; i<=NUM_DB_PRIORITIES; i++) {
372                 if (ctdb_start_freeze(ctdb, i)) {
373                         DEBUG(DEBUG_ERR,(__location__ " Failed to freeze databases of prio %u\n", i));
374                         continue;
375                 }
376
377                 /* block until frozen */
378                 while (ctdb->freeze_mode[i] == CTDB_FREEZE_PENDING) {
379                         event_loop_once(ctdb->ev);
380                 }
381         }
382
383         return 0;
384 }
385
386
387 static void thaw_priority(struct ctdb_context *ctdb, uint32_t priority)
388 {
389         DEBUG(DEBUG_ERR,("Thawing priority %u\n", priority));
390
391         /* cancel any pending transactions */
392         if (ctdb->freeze_transaction_started) {
393                 struct ctdb_db_context *ctdb_db;
394
395                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
396                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
397                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
398                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
399                                          ctdb_db->db_name));
400                         }
401                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
402                 }
403         }
404         ctdb->freeze_transaction_started = false;
405
406 #if 0
407         /* this hack can be used to get a copy of the databases at the end of a recovery */
408         system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
409 #endif
410
411 #if 0
412         /* and this one for local testing */
413         system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
414 #endif
415
416         if (ctdb->freeze_handles[priority] != NULL) {
417                 talloc_free(ctdb->freeze_handles[priority]);
418                 ctdb->freeze_handles[priority] = NULL;
419         }
420 }
421
422 /*
423   thaw the databases
424  */
425 int32_t ctdb_control_thaw(struct ctdb_context *ctdb, uint32_t priority)
426 {
427
428         if (priority > NUM_DB_PRIORITIES) {
429                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
430                 return -1;
431         }
432
433         if (priority == 0) {
434                 int i;
435                 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
436                         thaw_priority(ctdb, i);
437                 }
438         } else {
439                 thaw_priority(ctdb, priority);
440         }
441
442         ctdb_call_resend_all(ctdb);
443         return 0;
444 }
445
446
447 /*
448   start a transaction on all databases - used for recovery
449  */
450 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
451 {
452         struct ctdb_db_context *ctdb_db;
453         int i;
454
455         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
456                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
457                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
458                         return -1;
459                 }
460         }
461
462         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
463                 int ret;
464
465                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
466
467                 if (ctdb->freeze_transaction_started) {
468                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
469                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
470                                          ctdb_db->db_name));
471                                 /* not a fatal error */
472                         }
473                 }
474
475                 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
476
477                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
478
479                 if (ret != 0) {
480                         DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
481                                  ctdb_db->db_name));
482                         return -1;
483                 }
484         }
485
486         ctdb->freeze_transaction_started = true;
487         ctdb->freeze_transaction_id = id;
488
489         return 0;
490 }
491
492 /*
493   cancel a transaction for all databases - used for recovery
494  */
495 int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb)
496 {
497         struct ctdb_db_context *ctdb_db;
498
499         DEBUG(DEBUG_ERR,(__location__ " recovery transaction cancelled called\n"));
500
501         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
502                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
503
504                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
505                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",  ctdb_db->db_name));
506                         /* not a fatal error */
507                 }
508
509                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
510         }
511
512         ctdb->freeze_transaction_started = false;
513
514         return 0;
515 }
516
517 /*
518   commit transactions on all databases
519  */
520 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
521 {
522         struct ctdb_db_context *ctdb_db;
523         int i;
524         int healthy_nodes = 0;
525
526         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
527                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
528                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
529                         return -1;
530                 }
531         }
532
533         if (!ctdb->freeze_transaction_started) {
534                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
535                 return -1;
536         }
537
538         if (id != ctdb->freeze_transaction_id) {
539                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
540                 return -1;
541         }
542
543         DEBUG(DEBUG_DEBUG,(__location__ " num_nodes[%d]\n", ctdb->num_nodes));
544         for (i=0; i < ctdb->num_nodes; i++) {
545                 DEBUG(DEBUG_DEBUG,(__location__ " node[%d].flags[0x%X]\n",
546                                    i, ctdb->nodes[i]->flags));
547                 if (ctdb->nodes[i]->flags == 0) {
548                         healthy_nodes++;
549                 }
550         }
551         DEBUG(DEBUG_INFO,(__location__ " healthy_nodes[%d]\n", healthy_nodes));
552
553         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
554                 int ret;
555
556                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
557                 ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
558                 if (ret != 0) {
559                         DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
560                                  ctdb_db->db_name));
561                         goto fail;
562                 }
563                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
564
565                 ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, healthy_nodes);
566                 if (ret != 0) {
567                         DEBUG(DEBUG_CRIT,(__location__ " Failed to update persistent health for db '%s'. "
568                                          "Cancel all remaining transactions and resetting transaction_started to false.\n",
569                                          ctdb_db->db_name));
570                         goto fail;
571                 }
572         }
573
574         ctdb->freeze_transaction_started = false;
575         ctdb->freeze_transaction_id = 0;
576
577         return 0;
578
579 fail:
580         /* cancel any pending transactions */
581         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
582                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
583                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
584                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
585                                  ctdb_db->db_name));
586                 }
587                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
588         }
589         ctdb->freeze_transaction_started = false;
590
591         return -1;
592 }
593
594 /*
595   wipe a database - only possible when in a frozen transaction
596  */
597 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
598 {
599         struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
600         struct ctdb_db_context *ctdb_db;
601
602         ctdb_db = find_ctdb_db(ctdb, w.db_id);
603         if (!ctdb_db) {
604                 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
605                 return -1;
606         }
607
608         if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) {
609                 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
610                 return -1;
611         }
612
613         if (!ctdb->freeze_transaction_started) {
614                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
615                 return -1;
616         }
617
618         if (w.transaction_id != ctdb->freeze_transaction_id) {
619                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
620                 return -1;
621         }
622
623         if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
624                 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
625                          ctdb_db->db_name));
626                 return -1;
627         }
628
629         if (!ctdb_db->persistent) {
630                 talloc_free(ctdb_db->delete_queue);
631                 ctdb_db->delete_queue = trbt_create(ctdb_db, 0);
632                 if (ctdb_db->delete_queue == NULL) {
633                         DEBUG(DEBUG_ERR, (__location__ " Failed to re-create "
634                                           "the vacuum tree.\n"));
635                         return -1;
636                 }
637         }
638
639         return 0;
640 }