70333b0b33ddf11f6e6afdf56f792686e58f8b82
[metze/ctdb/wip.git] / server / ctdb_freeze.c
1 /* 
2    ctdb freeze handling
3
4    Copyright (C) Andrew Tridgell  2007
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "includes.h"
20 #include "lib/events/events.h"
21 #include "lib/tdb/include/tdb.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/util/dlinklist.h"
27 #include "db_wrap.h"
28
29
30 /*
31   lock all databases
32  */
33 static int ctdb_lock_all_databases(struct ctdb_context *ctdb, uint32_t priority)
34 {
35         struct ctdb_db_context *ctdb_db;
36         /* REMOVE later */
37         /* This double loop is for backward compatibility and deadlock
38            avoidance for old samba versions that not yet support
39            the set prio call.
40            This code shall be removed later
41         */
42         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
43                 if (ctdb_db->priority != priority) {
44                         continue;
45                 }
46                 if (strstr(ctdb_db->db_name, "notify") != NULL) {
47                         continue;
48                 }
49                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
50                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
51                         DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
52                         return -1;
53                 }
54         }
55         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
56                 if (ctdb_db->priority != priority) {
57                         continue;
58                 }
59                 if (strstr(ctdb_db->db_name, "notify") == NULL) {
60                         continue;
61                 }
62                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
63                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
64                         DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
65                         return -1;
66                 }
67         }
68         return 0;
69 }
70
71 /*
72   a list of control requests waiting for a freeze lock child to get
73   the database locks
74  */
75 struct ctdb_freeze_waiter {
76         struct ctdb_freeze_waiter *next, *prev;
77         struct ctdb_context *ctdb;
78         struct ctdb_req_control *c;
79         uint32_t priority;
80         int32_t status;
81 };
82
83 /* a handle to a freeze lock child process */
84 struct ctdb_freeze_handle {
85         struct ctdb_context *ctdb;
86         uint32_t priority;
87         pid_t child;
88         int fd;
89         struct ctdb_freeze_waiter *waiters;
90 };
91
92 /*
93   destroy a freeze handle
94  */     
95 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
96 {
97         struct ctdb_context *ctdb = h->ctdb;
98         struct ctdb_db_context *ctdb_db;
99
100         DEBUG(DEBUG_ERR,("Release freeze handler for prio %u\n", h->priority));
101
102         /* cancel any pending transactions */
103         if (ctdb->freeze_transaction_started) {
104                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
105                         if (ctdb_db->priority != h->priority) {
106                                 continue;
107                         }
108                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
109                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
110                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
111                                          ctdb_db->db_name));
112                         }
113                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
114                 }
115                 ctdb->freeze_transaction_started = false;
116         }
117
118         ctdb->freeze_mode[h->priority]    = CTDB_FREEZE_NONE;
119         ctdb->freeze_handles[h->priority] = NULL;
120
121         kill(h->child, SIGKILL);
122         return 0;
123 }
124
125 /*
126   called when the child writes its status to us
127  */
128 static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde, 
129                                        uint16_t flags, void *private_data)
130 {
131         struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle);
132         int32_t status;
133         struct ctdb_freeze_waiter *w;
134
135         if (h->ctdb->freeze_mode[h->priority] == CTDB_FREEZE_FROZEN) {
136                 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
137                 talloc_free(h);
138                 return;
139         }
140
141         if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
142                 DEBUG(DEBUG_ERR,("read error from freeze lock child\n"));
143                 status = -1;
144         }
145
146         if (status == -1) {
147                 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
148                 /* we didn't get the locks - destroy the handle */
149                 talloc_free(h);
150                 return;
151         }
152
153         h->ctdb->freeze_mode[h->priority] = CTDB_FREEZE_FROZEN;
154
155         /* notify the waiters */
156         if (h != h->ctdb->freeze_handles[h->priority]) {
157                 DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
158         }
159         while ((w = h->waiters)) {
160                 w->status = status;
161                 DLIST_REMOVE(h->waiters, w);
162                 talloc_free(w);
163         }
164 }
165
166 /*
167   create a child which gets locks on all the open databases, then calls the callback telling the parent
168   that it is done
169  */
170 static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, uint32_t priority)
171 {
172         struct ctdb_freeze_handle *h;
173         int fd[2];
174         struct fd_event *fde;
175
176         h = talloc_zero(ctdb, struct ctdb_freeze_handle);
177         CTDB_NO_MEMORY_NULL(ctdb, h);
178
179         h->ctdb     = ctdb;
180         h->priority = priority;
181
182         if (pipe(fd) == -1) {
183                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
184                 talloc_free(h);
185                 return NULL;
186         }
187         
188         h->child = fork();
189         if (h->child == -1) {
190                 DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
191                 talloc_free(h);
192                 return NULL;
193         }
194
195         if (h->child == 0) {
196                 int ret;
197
198                 /* in the child */
199                 close(fd[0]);
200
201                 ret = ctdb_lock_all_databases(ctdb, priority);
202                 if (ret != 0) {
203                         _exit(0);
204                 }
205
206                 ret = write(fd[1], &ret, sizeof(ret));
207                 if (ret != sizeof(ret)) {
208                         DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. ret:%d errno:%u\n", ret, errno));
209                         _exit(1);
210                 }
211
212                 while (1) {
213                         sleep(1);
214                         if (kill(ctdb->ctdbd_pid, 0) != 0) {
215                                 DEBUG(DEBUG_ERR,("Parent died. Exiting lock wait child\n"));
216
217                                 _exit(0);
218                         }
219                 }
220         }
221
222         talloc_set_destructor(h, ctdb_freeze_handle_destructor);
223
224         close(fd[1]);
225         set_close_on_exec(fd[0]);
226
227         h->fd = fd[0];
228
229
230         fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE, 
231                            ctdb_freeze_lock_handler, h);
232         if (fde == NULL) {
233                 DEBUG(DEBUG_ERR,("Failed to setup fd event for ctdb_freeze_lock\n"));
234                 close(fd[0]);
235                 talloc_free(h);
236                 return NULL;
237         }
238
239         return h;
240 }
241
242 /*
243   destroy a waiter for a freeze mode change
244  */
245 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
246 {
247         ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
248         return 0;
249 }
250
251 /*
252   start the freeze process for a certain priority
253  */
254 int ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
255 {
256         if (priority == 0) {
257                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
258                 priority = 1;
259         }
260
261         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
262                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
263                 return -1;
264         }
265
266         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
267                 /* we're already frozen */
268                 return 0;
269         }
270
271         /* if there isn't a freeze lock child then create one */
272         if (ctdb->freeze_handles[priority] == NULL) {
273                 ctdb->freeze_handles[priority] = ctdb_freeze_lock(ctdb, priority);
274                 CTDB_NO_MEMORY(ctdb, ctdb->freeze_handles[priority]);
275                 ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
276         }
277
278         return 0;
279 }
280
281 /*
282   freeze the databases
283  */
284 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
285 {
286         struct ctdb_freeze_waiter *w;
287         uint32_t priority;
288
289         priority = (uint32_t)c->srvid;
290
291         DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
292
293         if (priority == 0) {
294                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
295                 priority = 1;
296         }
297
298         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
299                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
300                 return -1;
301         }
302
303         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
304                 /* we're already frozen */
305                 return 0;
306         }
307
308         if (ctdb_start_freeze(ctdb, priority) != 0) {
309                 DEBUG(DEBUG_ERR,(__location__ " Failed to start freezing databases with priority %u\n", priority));
310                 return -1;
311         }
312
313         /* add ourselves to list of waiters */
314         if (ctdb->freeze_handles[priority] == NULL) {
315                 DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
316                 return -1;
317         }
318
319         w = talloc(ctdb->freeze_handles[priority], struct ctdb_freeze_waiter);
320         CTDB_NO_MEMORY(ctdb, w);
321         w->ctdb     = ctdb;
322         w->c        = talloc_steal(w, c);
323         w->priority = priority;
324         w->status   = -1;
325         talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
326         DLIST_ADD(ctdb->freeze_handles[priority]->waiters, w);
327
328         /* we won't reply till later */
329         *async_reply = True;
330         return 0;
331 }
332
333
334 /*
335   block until we are frozen, used during daemon startup
336  */
337 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
338 {
339         int i;
340
341         for (i=1; i<=NUM_DB_PRIORITIES; i++) {
342                 if (ctdb_start_freeze(ctdb, i)) {
343                         DEBUG(DEBUG_ERR,(__location__ " Failed to freeze databases of prio %u\n", i));
344                         continue;
345                 }
346
347                 /* block until frozen */
348                 while (ctdb->freeze_mode[i] == CTDB_FREEZE_PENDING) {
349                         event_loop_once(ctdb->ev);
350                 }
351         }
352
353         return 0;
354 }
355
356
357 static void thaw_priority(struct ctdb_context *ctdb, uint32_t priority)
358 {
359         DEBUG(DEBUG_ERR,("Thawing priority %u\n", priority));
360
361         /* cancel any pending transactions */
362         if (ctdb->freeze_transaction_started) {
363                 struct ctdb_db_context *ctdb_db;
364
365                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
366                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
367                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
368                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
369                                          ctdb_db->db_name));
370                         }
371                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
372                 }
373         }
374         ctdb->freeze_transaction_started = false;
375
376 #if 0
377         /* this hack can be used to get a copy of the databases at the end of a recovery */
378         system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
379 #endif
380
381 #if 0
382         /* and this one for local testing */
383         system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
384 #endif
385
386         if (ctdb->freeze_handles[priority] != NULL) {
387                 talloc_free(ctdb->freeze_handles[priority]);
388                 ctdb->freeze_handles[priority] = NULL;
389         }
390 }
391
392 /*
393   thaw the databases
394  */
395 int32_t ctdb_control_thaw(struct ctdb_context *ctdb, uint32_t priority)
396 {
397
398         if (priority > NUM_DB_PRIORITIES) {
399                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
400                 return -1;
401         }
402
403         if (priority == 0) {
404                 int i;
405                 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
406                         thaw_priority(ctdb, i);
407                 }
408         } else {
409                 thaw_priority(ctdb, priority);
410         }
411
412         ctdb_call_resend_all(ctdb);
413         return 0;
414 }
415
416
417 /*
418   start a transaction on all databases - used for recovery
419  */
420 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
421 {
422         struct ctdb_db_context *ctdb_db;
423         int i;
424
425         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
426                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
427                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
428                         return -1;
429                 }
430         }
431
432         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
433                 int ret;
434
435                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
436
437                 if (ctdb->freeze_transaction_started) {
438                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
439                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
440                                          ctdb_db->db_name));
441                                 /* not a fatal error */
442                         }
443                 }
444
445                 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
446
447                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
448
449                 if (ret != 0) {
450                         DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
451                                  ctdb_db->db_name));
452                         return -1;
453                 }
454         }
455
456         ctdb->freeze_transaction_started = true;
457         ctdb->freeze_transaction_id = id;
458
459         return 0;
460 }
461
462 /*
463   cancel a transaction for all databases - used for recovery
464  */
465 int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb)
466 {
467         struct ctdb_db_context *ctdb_db;
468
469         DEBUG(DEBUG_ERR,(__location__ " recovery transaction cancelled called\n"));
470
471         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
472                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
473
474                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
475                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",  ctdb_db->db_name));
476                         /* not a fatal error */
477                 }
478
479                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
480         }
481
482         ctdb->freeze_transaction_started = false;
483
484         return 0;
485 }
486
487 /*
488   commit transactions on all databases
489  */
490 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
491 {
492         struct ctdb_db_context *ctdb_db;
493         int i;
494         int healthy_nodes = 0;
495
496         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
497                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
498                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
499                         return -1;
500                 }
501         }
502
503         if (!ctdb->freeze_transaction_started) {
504                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
505                 return -1;
506         }
507
508         if (id != ctdb->freeze_transaction_id) {
509                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
510                 return -1;
511         }
512
513         DEBUG(DEBUG_DEBUG,(__location__ " num_nodes[%d]\n", ctdb->num_nodes));
514         for (i=0; i < ctdb->num_nodes; i++) {
515                 DEBUG(DEBUG_DEBUG,(__location__ " node[%d].flags[0x%X]\n",
516                                    i, ctdb->nodes[i]->flags));
517                 if (ctdb->nodes[i]->flags == 0) {
518                         healthy_nodes++;
519                 }
520         }
521         DEBUG(DEBUG_INFO,(__location__ " healthy_nodes[%d]\n", healthy_nodes));
522
523         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
524                 int ret;
525
526                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
527                 ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
528                 if (ret != 0) {
529                         DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
530                                  ctdb_db->db_name));
531                         goto fail;
532                 }
533                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
534
535                 ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, healthy_nodes);
536                 if (ret != 0) {
537                         DEBUG(DEBUG_CRIT,(__location__ " Failed to update persistent health for db '%s'. "
538                                          "Cancel all remaining transactions and resetting transaction_started to false.\n",
539                                          ctdb_db->db_name));
540                         goto fail;
541                 }
542         }
543
544         ctdb->freeze_transaction_started = false;
545         ctdb->freeze_transaction_id = 0;
546
547         return 0;
548
549 fail:
550         /* cancel any pending transactions */
551         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
552                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
553                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
554                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
555                                  ctdb_db->db_name));
556                 }
557                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
558         }
559         ctdb->freeze_transaction_started = false;
560
561         return -1;
562 }
563
564 /*
565   wipe a database - only possible when in a frozen transaction
566  */
567 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
568 {
569         struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
570         struct ctdb_db_context *ctdb_db;
571
572         ctdb_db = find_ctdb_db(ctdb, w.db_id);
573         if (!ctdb_db) {
574                 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
575                 return -1;
576         }
577
578         if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) {
579                 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
580                 return -1;
581         }
582
583         if (!ctdb->freeze_transaction_started) {
584                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
585                 return -1;
586         }
587
588         if (w.transaction_id != ctdb->freeze_transaction_id) {
589                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
590                 return -1;
591         }
592
593         if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
594                 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
595                          ctdb_db->db_name));
596                 return -1;
597         }
598
599         return 0;
600 }