When wiping a database, clear the delete_queue.
[ctdb.git] / server / ctdb_freeze.c
1 /* 
2    ctdb freeze handling
3
4    Copyright (C) Andrew Tridgell  2007
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "includes.h"
20 #include "lib/tevent/tevent.h"
21 #include "lib/tdb/include/tdb.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/util/dlinklist.h"
27 #include "db_wrap.h"
28 #include "../common/rb_tree.h"
29
30 static bool later_db(const char *name)
31 {
32         return (strstr(name, "notify") || strstr(name, "serverid"));
33 }
34
35 /*
36   lock all databases
37  */
38 static int ctdb_lock_all_databases(struct ctdb_context *ctdb, uint32_t priority)
39 {
40         struct ctdb_db_context *ctdb_db;
41         /* REMOVE later */
42         /* This double loop is for backward compatibility and deadlock
43            avoidance for old samba versions that not yet support
44            the set prio call.
45            This code shall be removed later
46         */
47         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
48                 if (ctdb_db->priority != priority) {
49                         continue;
50                 }
51                 if (later_db(ctdb_db->db_name)) {
52                         continue;
53                 }
54                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
55                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
56                         DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
57                         return -1;
58                 }
59         }
60         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
61                 if (ctdb_db->priority != priority) {
62                         continue;
63                 }
64                 if (!later_db(ctdb_db->db_name)) {
65                         continue;
66                 }
67                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
68                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
69                         DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
70                         return -1;
71                 }
72         }
73         return 0;
74 }
75
76 /*
77   a list of control requests waiting for a freeze lock child to get
78   the database locks
79  */
80 struct ctdb_freeze_waiter {
81         struct ctdb_freeze_waiter *next, *prev;
82         struct ctdb_context *ctdb;
83         struct ctdb_req_control *c;
84         uint32_t priority;
85         int32_t status;
86 };
87
88 /* a handle to a freeze lock child process */
89 struct ctdb_freeze_handle {
90         struct ctdb_context *ctdb;
91         uint32_t priority;
92         pid_t child;
93         int fd;
94         struct ctdb_freeze_waiter *waiters;
95 };
96
97 /*
98   destroy a freeze handle
99  */     
100 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
101 {
102         struct ctdb_context *ctdb = h->ctdb;
103         struct ctdb_db_context *ctdb_db;
104
105         DEBUG(DEBUG_ERR,("Release freeze handler for prio %u\n", h->priority));
106
107         /* cancel any pending transactions */
108         if (ctdb->freeze_transaction_started) {
109                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
110                         if (ctdb_db->priority != h->priority) {
111                                 continue;
112                         }
113                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
114                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
115                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
116                                          ctdb_db->db_name));
117                         }
118                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
119                 }
120                 ctdb->freeze_transaction_started = false;
121         }
122
123         ctdb->freeze_mode[h->priority]    = CTDB_FREEZE_NONE;
124         ctdb->freeze_handles[h->priority] = NULL;
125
126         kill(h->child, SIGKILL);
127         return 0;
128 }
129
130 /*
131   called when the child writes its status to us
132  */
133 static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde, 
134                                        uint16_t flags, void *private_data)
135 {
136         struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle);
137         int32_t status;
138         struct ctdb_freeze_waiter *w;
139
140         if (h->ctdb->freeze_mode[h->priority] == CTDB_FREEZE_FROZEN) {
141                 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
142                 talloc_free(h);
143                 return;
144         }
145
146         if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
147                 DEBUG(DEBUG_ERR,("read error from freeze lock child\n"));
148                 status = -1;
149         }
150
151         if (status == -1) {
152                 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
153                 /* we didn't get the locks - destroy the handle */
154                 talloc_free(h);
155                 return;
156         }
157
158         h->ctdb->freeze_mode[h->priority] = CTDB_FREEZE_FROZEN;
159
160         /* notify the waiters */
161         if (h != h->ctdb->freeze_handles[h->priority]) {
162                 DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
163         }
164         while ((w = h->waiters)) {
165                 w->status = status;
166                 DLIST_REMOVE(h->waiters, w);
167                 talloc_free(w);
168         }
169 }
170
171 /*
172   create a child which gets locks on all the open databases, then calls the callback telling the parent
173   that it is done
174  */
175 static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, uint32_t priority)
176 {
177         struct ctdb_freeze_handle *h;
178         int fd[2];
179         struct fd_event *fde;
180
181         h = talloc_zero(ctdb, struct ctdb_freeze_handle);
182         CTDB_NO_MEMORY_NULL(ctdb, h);
183
184         h->ctdb     = ctdb;
185         h->priority = priority;
186
187         if (pipe(fd) == -1) {
188                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
189                 talloc_free(h);
190                 return NULL;
191         }
192         
193         h->child = fork();
194         if (h->child == -1) {
195                 DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
196                 talloc_free(h);
197                 return NULL;
198         }
199
200         if (h->child == 0) {
201                 int ret;
202
203                 /* in the child */
204                 close(fd[0]);
205
206                 debug_extra = talloc_asprintf(NULL, "freeze_lock-%u:", priority);
207                 ret = ctdb_lock_all_databases(ctdb, priority);
208                 if (ret != 0) {
209                         _exit(0);
210                 }
211
212                 ret = write(fd[1], &ret, sizeof(ret));
213                 if (ret != sizeof(ret)) {
214                         DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. ret:%d errno:%u\n", ret, errno));
215                         _exit(1);
216                 }
217
218                 while (1) {
219                         sleep(1);
220                         if (kill(ctdb->ctdbd_pid, 0) != 0) {
221                                 DEBUG(DEBUG_ERR,("Parent died. Exiting lock wait child\n"));
222
223                                 _exit(0);
224                         }
225                 }
226         }
227
228         talloc_set_destructor(h, ctdb_freeze_handle_destructor);
229
230         close(fd[1]);
231         set_close_on_exec(fd[0]);
232
233         h->fd = fd[0];
234
235
236         fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ,
237                            ctdb_freeze_lock_handler, h);
238         if (fde == NULL) {
239                 DEBUG(DEBUG_ERR,("Failed to setup fd event for ctdb_freeze_lock\n"));
240                 close(fd[0]);
241                 talloc_free(h);
242                 return NULL;
243         }
244         tevent_fd_set_auto_close(fde);
245
246         return h;
247 }
248
249 /*
250   destroy a waiter for a freeze mode change
251  */
252 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
253 {
254         ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
255         return 0;
256 }
257
258 /*
259   start the freeze process for a certain priority
260  */
261 int ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
262 {
263         if (priority == 0) {
264                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
265                 priority = 1;
266         }
267
268         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
269                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
270                 return -1;
271         }
272
273         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
274                 /* we're already frozen */
275                 return 0;
276         }
277
278         /* Stop any vacuuming going on: we don't want to wait. */
279         ctdb_stop_vacuuming(ctdb);
280
281         /* if there isn't a freeze lock child then create one */
282         if (ctdb->freeze_handles[priority] == NULL) {
283                 ctdb->freeze_handles[priority] = ctdb_freeze_lock(ctdb, priority);
284                 CTDB_NO_MEMORY(ctdb, ctdb->freeze_handles[priority]);
285                 ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
286         }
287
288         return 0;
289 }
290
291 /*
292   freeze the databases
293  */
294 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
295 {
296         struct ctdb_freeze_waiter *w;
297         uint32_t priority;
298
299         priority = (uint32_t)c->srvid;
300
301         DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
302
303         if (priority == 0) {
304                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
305                 priority = 1;
306         }
307
308         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
309                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
310                 return -1;
311         }
312
313         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
314                 /* we're already frozen */
315                 return 0;
316         }
317
318         if (ctdb_start_freeze(ctdb, priority) != 0) {
319                 DEBUG(DEBUG_ERR,(__location__ " Failed to start freezing databases with priority %u\n", priority));
320                 return -1;
321         }
322
323         /* add ourselves to list of waiters */
324         if (ctdb->freeze_handles[priority] == NULL) {
325                 DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
326                 return -1;
327         }
328
329         w = talloc(ctdb->freeze_handles[priority], struct ctdb_freeze_waiter);
330         CTDB_NO_MEMORY(ctdb, w);
331         w->ctdb     = ctdb;
332         w->c        = talloc_steal(w, c);
333         w->priority = priority;
334         w->status   = -1;
335         talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
336         DLIST_ADD(ctdb->freeze_handles[priority]->waiters, w);
337
338         /* we won't reply till later */
339         *async_reply = True;
340         return 0;
341 }
342
343
344 /*
345   block until we are frozen, used during daemon startup
346  */
347 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
348 {
349         int i;
350
351         for (i=1; i<=NUM_DB_PRIORITIES; i++) {
352                 if (ctdb_start_freeze(ctdb, i)) {
353                         DEBUG(DEBUG_ERR,(__location__ " Failed to freeze databases of prio %u\n", i));
354                         continue;
355                 }
356
357                 /* block until frozen */
358                 while (ctdb->freeze_mode[i] == CTDB_FREEZE_PENDING) {
359                         event_loop_once(ctdb->ev);
360                 }
361         }
362
363         return 0;
364 }
365
366
367 static void thaw_priority(struct ctdb_context *ctdb, uint32_t priority)
368 {
369         DEBUG(DEBUG_ERR,("Thawing priority %u\n", priority));
370
371         /* cancel any pending transactions */
372         if (ctdb->freeze_transaction_started) {
373                 struct ctdb_db_context *ctdb_db;
374
375                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
376                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
377                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
378                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
379                                          ctdb_db->db_name));
380                         }
381                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
382                 }
383         }
384         ctdb->freeze_transaction_started = false;
385
386 #if 0
387         /* this hack can be used to get a copy of the databases at the end of a recovery */
388         system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
389 #endif
390
391 #if 0
392         /* and this one for local testing */
393         system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
394 #endif
395
396         if (ctdb->freeze_handles[priority] != NULL) {
397                 talloc_free(ctdb->freeze_handles[priority]);
398                 ctdb->freeze_handles[priority] = NULL;
399         }
400 }
401
402 /*
403   thaw the databases
404  */
405 int32_t ctdb_control_thaw(struct ctdb_context *ctdb, uint32_t priority)
406 {
407
408         if (priority > NUM_DB_PRIORITIES) {
409                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
410                 return -1;
411         }
412
413         if (priority == 0) {
414                 int i;
415                 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
416                         thaw_priority(ctdb, i);
417                 }
418         } else {
419                 thaw_priority(ctdb, priority);
420         }
421
422         ctdb_call_resend_all(ctdb);
423         return 0;
424 }
425
426
427 /*
428   start a transaction on all databases - used for recovery
429  */
430 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
431 {
432         struct ctdb_db_context *ctdb_db;
433         int i;
434
435         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
436                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
437                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
438                         return -1;
439                 }
440         }
441
442         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
443                 int ret;
444
445                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
446
447                 if (ctdb->freeze_transaction_started) {
448                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
449                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
450                                          ctdb_db->db_name));
451                                 /* not a fatal error */
452                         }
453                 }
454
455                 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
456
457                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
458
459                 if (ret != 0) {
460                         DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
461                                  ctdb_db->db_name));
462                         return -1;
463                 }
464         }
465
466         ctdb->freeze_transaction_started = true;
467         ctdb->freeze_transaction_id = id;
468
469         return 0;
470 }
471
472 /*
473   cancel a transaction for all databases - used for recovery
474  */
475 int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb)
476 {
477         struct ctdb_db_context *ctdb_db;
478
479         DEBUG(DEBUG_ERR,(__location__ " recovery transaction cancelled called\n"));
480
481         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
482                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
483
484                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
485                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",  ctdb_db->db_name));
486                         /* not a fatal error */
487                 }
488
489                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
490         }
491
492         ctdb->freeze_transaction_started = false;
493
494         return 0;
495 }
496
497 /*
498   commit transactions on all databases
499  */
500 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
501 {
502         struct ctdb_db_context *ctdb_db;
503         int i;
504         int healthy_nodes = 0;
505
506         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
507                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
508                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
509                         return -1;
510                 }
511         }
512
513         if (!ctdb->freeze_transaction_started) {
514                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
515                 return -1;
516         }
517
518         if (id != ctdb->freeze_transaction_id) {
519                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
520                 return -1;
521         }
522
523         DEBUG(DEBUG_DEBUG,(__location__ " num_nodes[%d]\n", ctdb->num_nodes));
524         for (i=0; i < ctdb->num_nodes; i++) {
525                 DEBUG(DEBUG_DEBUG,(__location__ " node[%d].flags[0x%X]\n",
526                                    i, ctdb->nodes[i]->flags));
527                 if (ctdb->nodes[i]->flags == 0) {
528                         healthy_nodes++;
529                 }
530         }
531         DEBUG(DEBUG_INFO,(__location__ " healthy_nodes[%d]\n", healthy_nodes));
532
533         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
534                 int ret;
535
536                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
537                 ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
538                 if (ret != 0) {
539                         DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
540                                  ctdb_db->db_name));
541                         goto fail;
542                 }
543                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
544
545                 ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, healthy_nodes);
546                 if (ret != 0) {
547                         DEBUG(DEBUG_CRIT,(__location__ " Failed to update persistent health for db '%s'. "
548                                          "Cancel all remaining transactions and resetting transaction_started to false.\n",
549                                          ctdb_db->db_name));
550                         goto fail;
551                 }
552         }
553
554         ctdb->freeze_transaction_started = false;
555         ctdb->freeze_transaction_id = 0;
556
557         return 0;
558
559 fail:
560         /* cancel any pending transactions */
561         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
562                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
563                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
564                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
565                                  ctdb_db->db_name));
566                 }
567                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
568         }
569         ctdb->freeze_transaction_started = false;
570
571         return -1;
572 }
573
574 /*
575   wipe a database - only possible when in a frozen transaction
576  */
577 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
578 {
579         struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
580         struct ctdb_db_context *ctdb_db;
581
582         ctdb_db = find_ctdb_db(ctdb, w.db_id);
583         if (!ctdb_db) {
584                 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
585                 return -1;
586         }
587
588         if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) {
589                 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
590                 return -1;
591         }
592
593         if (!ctdb->freeze_transaction_started) {
594                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
595                 return -1;
596         }
597
598         if (w.transaction_id != ctdb->freeze_transaction_id) {
599                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
600                 return -1;
601         }
602
603         if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
604                 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
605                          ctdb_db->db_name));
606                 return -1;
607         }
608
609         if (!ctdb_db->persistent) {
610                 talloc_free(ctdb_db->delete_queue);
611                 ctdb_db->delete_queue = trbt_create(ctdb_db, 0);
612                 if (ctdb_db->delete_queue == NULL) {
613                         DEBUG(DEBUG_ERR, (__location__ " Failed to re-create "
614                                           "the vacuum tree.\n"));
615                         return -1;
616                 }
617         }
618
619         return 0;
620 }