Merge commit 'rusty/vacuum-fix-master'
[samba.git] / ctdb / server / ctdb_freeze.c
1 /* 
2    ctdb freeze handling
3
4    Copyright (C) Andrew Tridgell  2007
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "includes.h"
20 #include "lib/tevent/tevent.h"
21 #include "lib/tdb/include/tdb.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/util/dlinklist.h"
27 #include "db_wrap.h"
28
29 static bool later_db(const char *name)
30 {
31         return (strstr(name, "notify") || strstr(name, "serverid"));
32 }
33
34 /*
35   lock all databases
36  */
37 static int ctdb_lock_all_databases(struct ctdb_context *ctdb, uint32_t priority)
38 {
39         struct ctdb_db_context *ctdb_db;
40         /* REMOVE later */
41         /* This double loop is for backward compatibility and deadlock
42            avoidance for old samba versions that not yet support
43            the set prio call.
44            This code shall be removed later
45         */
46         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
47                 if (ctdb_db->priority != priority) {
48                         continue;
49                 }
50                 if (later_db(ctdb_db->db_name)) {
51                         continue;
52                 }
53                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
54                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
55                         DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
56                         return -1;
57                 }
58         }
59         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
60                 if (ctdb_db->priority != priority) {
61                         continue;
62                 }
63                 if (!later_db(ctdb_db->db_name)) {
64                         continue;
65                 }
66                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
67                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
68                         DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
69                         return -1;
70                 }
71         }
72         return 0;
73 }
74
75 /*
76   a list of control requests waiting for a freeze lock child to get
77   the database locks
78  */
79 struct ctdb_freeze_waiter {
80         struct ctdb_freeze_waiter *next, *prev;
81         struct ctdb_context *ctdb;
82         struct ctdb_req_control *c;
83         uint32_t priority;
84         int32_t status;
85 };
86
87 /* a handle to a freeze lock child process */
88 struct ctdb_freeze_handle {
89         struct ctdb_context *ctdb;
90         uint32_t priority;
91         pid_t child;
92         int fd;
93         struct ctdb_freeze_waiter *waiters;
94 };
95
96 /*
97   destroy a freeze handle
98  */     
99 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
100 {
101         struct ctdb_context *ctdb = h->ctdb;
102         struct ctdb_db_context *ctdb_db;
103
104         DEBUG(DEBUG_ERR,("Release freeze handler for prio %u\n", h->priority));
105
106         /* cancel any pending transactions */
107         if (ctdb->freeze_transaction_started) {
108                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
109                         if (ctdb_db->priority != h->priority) {
110                                 continue;
111                         }
112                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
113                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
114                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
115                                          ctdb_db->db_name));
116                         }
117                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
118                 }
119                 ctdb->freeze_transaction_started = false;
120         }
121
122         ctdb->freeze_mode[h->priority]    = CTDB_FREEZE_NONE;
123         ctdb->freeze_handles[h->priority] = NULL;
124
125         kill(h->child, SIGKILL);
126         return 0;
127 }
128
129 /*
130   called when the child writes its status to us
131  */
132 static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde, 
133                                        uint16_t flags, void *private_data)
134 {
135         struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle);
136         int32_t status;
137         struct ctdb_freeze_waiter *w;
138
139         if (h->ctdb->freeze_mode[h->priority] == CTDB_FREEZE_FROZEN) {
140                 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
141                 talloc_free(h);
142                 return;
143         }
144
145         if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
146                 DEBUG(DEBUG_ERR,("read error from freeze lock child\n"));
147                 status = -1;
148         }
149
150         if (status == -1) {
151                 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
152                 /* we didn't get the locks - destroy the handle */
153                 talloc_free(h);
154                 return;
155         }
156
157         h->ctdb->freeze_mode[h->priority] = CTDB_FREEZE_FROZEN;
158
159         /* notify the waiters */
160         if (h != h->ctdb->freeze_handles[h->priority]) {
161                 DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
162         }
163         while ((w = h->waiters)) {
164                 w->status = status;
165                 DLIST_REMOVE(h->waiters, w);
166                 talloc_free(w);
167         }
168 }
169
170 /*
171   create a child which gets locks on all the open databases, then calls the callback telling the parent
172   that it is done
173  */
174 static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, uint32_t priority)
175 {
176         struct ctdb_freeze_handle *h;
177         int fd[2];
178         struct fd_event *fde;
179
180         h = talloc_zero(ctdb, struct ctdb_freeze_handle);
181         CTDB_NO_MEMORY_NULL(ctdb, h);
182
183         h->ctdb     = ctdb;
184         h->priority = priority;
185
186         if (pipe(fd) == -1) {
187                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
188                 talloc_free(h);
189                 return NULL;
190         }
191         
192         h->child = fork();
193         if (h->child == -1) {
194                 DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
195                 talloc_free(h);
196                 return NULL;
197         }
198
199         if (h->child == 0) {
200                 int ret;
201
202                 /* in the child */
203                 close(fd[0]);
204
205                 ret = ctdb_lock_all_databases(ctdb, priority);
206                 if (ret != 0) {
207                         _exit(0);
208                 }
209
210                 ret = write(fd[1], &ret, sizeof(ret));
211                 if (ret != sizeof(ret)) {
212                         DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. ret:%d errno:%u\n", ret, errno));
213                         _exit(1);
214                 }
215
216                 while (1) {
217                         sleep(1);
218                         if (kill(ctdb->ctdbd_pid, 0) != 0) {
219                                 DEBUG(DEBUG_ERR,("Parent died. Exiting lock wait child\n"));
220
221                                 _exit(0);
222                         }
223                 }
224         }
225
226         talloc_set_destructor(h, ctdb_freeze_handle_destructor);
227
228         close(fd[1]);
229         set_close_on_exec(fd[0]);
230
231         h->fd = fd[0];
232
233
234         fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ,
235                            ctdb_freeze_lock_handler, h);
236         if (fde == NULL) {
237                 DEBUG(DEBUG_ERR,("Failed to setup fd event for ctdb_freeze_lock\n"));
238                 close(fd[0]);
239                 talloc_free(h);
240                 return NULL;
241         }
242         tevent_fd_set_auto_close(fde);
243
244         return h;
245 }
246
247 /*
248   destroy a waiter for a freeze mode change
249  */
250 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
251 {
252         ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
253         return 0;
254 }
255
256 /*
257   start the freeze process for a certain priority
258  */
259 int ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
260 {
261         if (priority == 0) {
262                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
263                 priority = 1;
264         }
265
266         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
267                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
268                 return -1;
269         }
270
271         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
272                 /* we're already frozen */
273                 return 0;
274         }
275
276         /* Stop any vacuuming going on: we don't want to wait. */
277         ctdb_stop_vacuuming(ctdb);
278
279         /* if there isn't a freeze lock child then create one */
280         if (ctdb->freeze_handles[priority] == NULL) {
281                 ctdb->freeze_handles[priority] = ctdb_freeze_lock(ctdb, priority);
282                 CTDB_NO_MEMORY(ctdb, ctdb->freeze_handles[priority]);
283                 ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
284         }
285
286         return 0;
287 }
288
289 /*
290   freeze the databases
291  */
292 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
293 {
294         struct ctdb_freeze_waiter *w;
295         uint32_t priority;
296
297         priority = (uint32_t)c->srvid;
298
299         DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
300
301         if (priority == 0) {
302                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
303                 priority = 1;
304         }
305
306         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
307                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
308                 return -1;
309         }
310
311         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
312                 /* we're already frozen */
313                 return 0;
314         }
315
316         if (ctdb_start_freeze(ctdb, priority) != 0) {
317                 DEBUG(DEBUG_ERR,(__location__ " Failed to start freezing databases with priority %u\n", priority));
318                 return -1;
319         }
320
321         /* add ourselves to list of waiters */
322         if (ctdb->freeze_handles[priority] == NULL) {
323                 DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
324                 return -1;
325         }
326
327         w = talloc(ctdb->freeze_handles[priority], struct ctdb_freeze_waiter);
328         CTDB_NO_MEMORY(ctdb, w);
329         w->ctdb     = ctdb;
330         w->c        = talloc_steal(w, c);
331         w->priority = priority;
332         w->status   = -1;
333         talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
334         DLIST_ADD(ctdb->freeze_handles[priority]->waiters, w);
335
336         /* we won't reply till later */
337         *async_reply = True;
338         return 0;
339 }
340
341
342 /*
343   block until we are frozen, used during daemon startup
344  */
345 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
346 {
347         int i;
348
349         for (i=1; i<=NUM_DB_PRIORITIES; i++) {
350                 if (ctdb_start_freeze(ctdb, i)) {
351                         DEBUG(DEBUG_ERR,(__location__ " Failed to freeze databases of prio %u\n", i));
352                         continue;
353                 }
354
355                 /* block until frozen */
356                 while (ctdb->freeze_mode[i] == CTDB_FREEZE_PENDING) {
357                         event_loop_once(ctdb->ev);
358                 }
359         }
360
361         return 0;
362 }
363
364
365 static void thaw_priority(struct ctdb_context *ctdb, uint32_t priority)
366 {
367         DEBUG(DEBUG_ERR,("Thawing priority %u\n", priority));
368
369         /* cancel any pending transactions */
370         if (ctdb->freeze_transaction_started) {
371                 struct ctdb_db_context *ctdb_db;
372
373                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
374                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
375                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
376                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
377                                          ctdb_db->db_name));
378                         }
379                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
380                 }
381         }
382         ctdb->freeze_transaction_started = false;
383
384 #if 0
385         /* this hack can be used to get a copy of the databases at the end of a recovery */
386         system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
387 #endif
388
389 #if 0
390         /* and this one for local testing */
391         system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
392 #endif
393
394         if (ctdb->freeze_handles[priority] != NULL) {
395                 talloc_free(ctdb->freeze_handles[priority]);
396                 ctdb->freeze_handles[priority] = NULL;
397         }
398 }
399
400 /*
401   thaw the databases
402  */
403 int32_t ctdb_control_thaw(struct ctdb_context *ctdb, uint32_t priority)
404 {
405
406         if (priority > NUM_DB_PRIORITIES) {
407                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
408                 return -1;
409         }
410
411         if (priority == 0) {
412                 int i;
413                 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
414                         thaw_priority(ctdb, i);
415                 }
416         } else {
417                 thaw_priority(ctdb, priority);
418         }
419
420         ctdb_call_resend_all(ctdb);
421         return 0;
422 }
423
424
425 /*
426   start a transaction on all databases - used for recovery
427  */
428 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
429 {
430         struct ctdb_db_context *ctdb_db;
431         int i;
432
433         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
434                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
435                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
436                         return -1;
437                 }
438         }
439
440         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
441                 int ret;
442
443                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
444
445                 if (ctdb->freeze_transaction_started) {
446                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
447                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
448                                          ctdb_db->db_name));
449                                 /* not a fatal error */
450                         }
451                 }
452
453                 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
454
455                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
456
457                 if (ret != 0) {
458                         DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
459                                  ctdb_db->db_name));
460                         return -1;
461                 }
462         }
463
464         ctdb->freeze_transaction_started = true;
465         ctdb->freeze_transaction_id = id;
466
467         return 0;
468 }
469
470 /*
471   cancel a transaction for all databases - used for recovery
472  */
473 int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb)
474 {
475         struct ctdb_db_context *ctdb_db;
476
477         DEBUG(DEBUG_ERR,(__location__ " recovery transaction cancelled called\n"));
478
479         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
480                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
481
482                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
483                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",  ctdb_db->db_name));
484                         /* not a fatal error */
485                 }
486
487                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
488         }
489
490         ctdb->freeze_transaction_started = false;
491
492         return 0;
493 }
494
495 /*
496   commit transactions on all databases
497  */
498 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
499 {
500         struct ctdb_db_context *ctdb_db;
501         int i;
502         int healthy_nodes = 0;
503
504         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
505                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
506                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
507                         return -1;
508                 }
509         }
510
511         if (!ctdb->freeze_transaction_started) {
512                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
513                 return -1;
514         }
515
516         if (id != ctdb->freeze_transaction_id) {
517                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
518                 return -1;
519         }
520
521         DEBUG(DEBUG_DEBUG,(__location__ " num_nodes[%d]\n", ctdb->num_nodes));
522         for (i=0; i < ctdb->num_nodes; i++) {
523                 DEBUG(DEBUG_DEBUG,(__location__ " node[%d].flags[0x%X]\n",
524                                    i, ctdb->nodes[i]->flags));
525                 if (ctdb->nodes[i]->flags == 0) {
526                         healthy_nodes++;
527                 }
528         }
529         DEBUG(DEBUG_INFO,(__location__ " healthy_nodes[%d]\n", healthy_nodes));
530
531         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
532                 int ret;
533
534                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
535                 ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
536                 if (ret != 0) {
537                         DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
538                                  ctdb_db->db_name));
539                         goto fail;
540                 }
541                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
542
543                 ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, healthy_nodes);
544                 if (ret != 0) {
545                         DEBUG(DEBUG_CRIT,(__location__ " Failed to update persistent health for db '%s'. "
546                                          "Cancel all remaining transactions and resetting transaction_started to false.\n",
547                                          ctdb_db->db_name));
548                         goto fail;
549                 }
550         }
551
552         ctdb->freeze_transaction_started = false;
553         ctdb->freeze_transaction_id = 0;
554
555         return 0;
556
557 fail:
558         /* cancel any pending transactions */
559         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
560                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
561                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
562                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
563                                  ctdb_db->db_name));
564                 }
565                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
566         }
567         ctdb->freeze_transaction_started = false;
568
569         return -1;
570 }
571
572 /*
573   wipe a database - only possible when in a frozen transaction
574  */
575 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
576 {
577         struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
578         struct ctdb_db_context *ctdb_db;
579
580         ctdb_db = find_ctdb_db(ctdb, w.db_id);
581         if (!ctdb_db) {
582                 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
583                 return -1;
584         }
585
586         if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) {
587                 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
588                 return -1;
589         }
590
591         if (!ctdb->freeze_transaction_started) {
592                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
593                 return -1;
594         }
595
596         if (w.transaction_id != ctdb->freeze_transaction_id) {
597                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
598                 return -1;
599         }
600
601         if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
602                 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
603                          ctdb_db->db_name));
604                 return -1;
605         }
606
607         return 0;
608 }