rename ctdb.h to ctdb_protocol.h
[sahlberg/ctdb.git] / server / ctdb_freeze.c
1 /* 
2    ctdb freeze handling
3
4    Copyright (C) Andrew Tridgell  2007
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "includes.h"
20 #include "lib/events/events.h"
21 #include "lib/tdb/include/tdb.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "include/ctdb_protocol.h"
26 #include "include/ctdb_private.h"
27 #include "lib/util/dlinklist.h"
28 #include "db_wrap.h"
29
30
31 /*
32   lock all databases
33  */
34 static int ctdb_lock_all_databases(struct ctdb_context *ctdb, uint32_t priority)
35 {
36         struct ctdb_db_context *ctdb_db;
37         /* REMOVE later */
38         /* This double loop is for backward compatibility and deadlock
39            avoidance for old samba versions that not yet support
40            the set prio call.
41            This code shall be removed later
42         */
43         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
44                 if (ctdb_db->priority != priority) {
45                         continue;
46                 }
47                 if (strstr(ctdb_db->db_name, "notify") != NULL) {
48                         continue;
49                 }
50                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
51                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
52                         return -1;
53                 }
54         }
55         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
56                 if (ctdb_db->priority != priority) {
57                         continue;
58                 }
59                 if (strstr(ctdb_db->db_name, "notify") == NULL) {
60                         continue;
61                 }
62                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
63                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
64                         return -1;
65                 }
66         }
67         return 0;
68 }
69
70 /*
71   a list of control requests waiting for a freeze lock child to get
72   the database locks
73  */
74 struct ctdb_freeze_waiter {
75         struct ctdb_freeze_waiter *next, *prev;
76         struct ctdb_context *ctdb;
77         struct ctdb_req_control *c;
78         uint32_t priority;
79         int32_t status;
80 };
81
82 /* a handle to a freeze lock child process */
83 struct ctdb_freeze_handle {
84         struct ctdb_context *ctdb;
85         uint32_t priority;
86         pid_t child;
87         int fd;
88         struct ctdb_freeze_waiter *waiters;
89 };
90
91 /*
92   destroy a freeze handle
93  */     
94 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
95 {
96         struct ctdb_context *ctdb = h->ctdb;
97         struct ctdb_db_context *ctdb_db;
98
99         DEBUG(DEBUG_ERR,("Release freeze handler for prio %u\n", h->priority));
100
101         /* cancel any pending transactions */
102         if (ctdb->freeze_transaction_started) {
103                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
104                         if (ctdb_db->priority != h->priority) {
105                                 continue;
106                         }
107                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
108                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
109                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
110                                          ctdb_db->db_name));
111                         }
112                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
113                 }
114                 ctdb->freeze_transaction_started = false;
115         }
116
117         ctdb->freeze_mode[h->priority]    = CTDB_FREEZE_NONE;
118         ctdb->freeze_handles[h->priority] = NULL;
119
120         kill(h->child, SIGKILL);
121         return 0;
122 }
123
124 /*
125   called when the child writes its status to us
126  */
127 static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde, 
128                                        uint16_t flags, void *private_data)
129 {
130         struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle);
131         int32_t status;
132         struct ctdb_freeze_waiter *w;
133
134         if (h->ctdb->freeze_mode[h->priority] == CTDB_FREEZE_FROZEN) {
135                 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
136                 talloc_free(h);
137                 return;
138         }
139
140         if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
141                 DEBUG(DEBUG_ERR,("read error from freeze lock child\n"));
142                 status = -1;
143         }
144
145         if (status == -1) {
146                 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
147                 /* we didn't get the locks - destroy the handle */
148                 talloc_free(h);
149                 return;
150         }
151
152         h->ctdb->freeze_mode[h->priority] = CTDB_FREEZE_FROZEN;
153
154         /* notify the waiters */
155         if (h != h->ctdb->freeze_handles[h->priority]) {
156                 DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
157         }
158         while ((w = h->waiters)) {
159                 w->status = status;
160                 DLIST_REMOVE(h->waiters, w);
161                 talloc_free(w);
162         }
163 }
164
165 /*
166   create a child which gets locks on all the open databases, then calls the callback telling the parent
167   that it is done
168  */
169 static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, uint32_t priority)
170 {
171         struct ctdb_freeze_handle *h;
172         int fd[2];
173         struct fd_event *fde;
174
175         h = talloc_zero(ctdb, struct ctdb_freeze_handle);
176         CTDB_NO_MEMORY_NULL(ctdb, h);
177
178         h->ctdb     = ctdb;
179         h->priority = priority;
180
181         if (pipe(fd) == -1) {
182                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
183                 talloc_free(h);
184                 return NULL;
185         }
186         
187         h->child = fork();
188         if (h->child == -1) {
189                 DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
190                 talloc_free(h);
191                 return NULL;
192         }
193
194         if (h->child == 0) {
195                 int ret;
196
197                 /* in the child */
198                 close(fd[0]);
199
200                 ret = ctdb_lock_all_databases(ctdb, priority);
201                 if (ret != 0) {
202                         _exit(0);
203                 }
204
205                 ret = write(fd[1], &ret, sizeof(ret));
206                 if (ret != sizeof(ret)) {
207                         DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. ret:%d errno:%u\n", ret, errno));
208                         _exit(1);
209                 }
210
211                 while (1) {
212                         sleep(1);
213                         if (kill(ctdb->ctdbd_pid, 0) != 0) {
214                                 DEBUG(DEBUG_ERR,("Parent died. Exiting lock wait child\n"));
215
216                                 _exit(0);
217                         }
218                 }
219         }
220
221         talloc_set_destructor(h, ctdb_freeze_handle_destructor);
222
223         close(fd[1]);
224         set_close_on_exec(fd[0]);
225
226         h->fd = fd[0];
227
228
229         fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE, 
230                            ctdb_freeze_lock_handler, h);
231         if (fde == NULL) {
232                 DEBUG(DEBUG_ERR,("Failed to setup fd event for ctdb_freeze_lock\n"));
233                 close(fd[0]);
234                 talloc_free(h);
235                 return NULL;
236         }
237
238         return h;
239 }
240
241 /*
242   destroy a waiter for a freeze mode change
243  */
244 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
245 {
246         ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
247         return 0;
248 }
249
250 /*
251   start the freeze process for a certain priority
252  */
253 int ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
254 {
255         if (priority == 0) {
256                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
257                 priority = 1;
258         }
259
260         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
261                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
262                 return -1;
263         }
264
265         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
266                 /* we're already frozen */
267                 return 0;
268         }
269
270         /* if there isn't a freeze lock child then create one */
271         if (ctdb->freeze_handles[priority] == NULL) {
272                 ctdb->freeze_handles[priority] = ctdb_freeze_lock(ctdb, priority);
273                 CTDB_NO_MEMORY(ctdb, ctdb->freeze_handles[priority]);
274                 ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
275         }
276
277         return 0;
278 }
279
280 /*
281   freeze the databases
282  */
283 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
284 {
285         struct ctdb_freeze_waiter *w;
286         uint32_t priority;
287
288         priority = (uint32_t)c->srvid;
289
290         DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
291
292         if (priority == 0) {
293                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
294                 priority = 1;
295         }
296
297         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
298                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
299                 return -1;
300         }
301
302         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
303                 /* we're already frozen */
304                 return 0;
305         }
306
307         if (ctdb_start_freeze(ctdb, priority) != 0) {
308                 DEBUG(DEBUG_ERR,(__location__ " Failed to start freezing databases with priority %u\n", priority));
309                 return -1;
310         }
311
312         /* add ourselves to list of waiters */
313         if (ctdb->freeze_handles[priority] == NULL) {
314                 DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
315                 return -1;
316         }
317
318         w = talloc(ctdb->freeze_handles[priority], struct ctdb_freeze_waiter);
319         CTDB_NO_MEMORY(ctdb, w);
320         w->ctdb     = ctdb;
321         w->c        = talloc_steal(w, c);
322         w->priority = priority;
323         w->status   = -1;
324         talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
325         DLIST_ADD(ctdb->freeze_handles[priority]->waiters, w);
326
327         /* we won't reply till later */
328         *async_reply = True;
329         return 0;
330 }
331
332
333 /*
334   block until we are frozen, used during daemon startup
335  */
336 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
337 {
338         int i;
339
340         for (i=1; i<=NUM_DB_PRIORITIES; i++) {
341                 if (ctdb_start_freeze(ctdb, i)) {
342                         DEBUG(DEBUG_ERR,(__location__ " Failed to freeze databases of prio %u\n", i));
343                         continue;
344                 }
345
346                 /* block until frozen */
347                 while (ctdb->freeze_mode[i] == CTDB_FREEZE_PENDING) {
348                         event_loop_once(ctdb->ev);
349                 }
350         }
351
352         return 0;
353 }
354
355
356 static void thaw_priority(struct ctdb_context *ctdb, uint32_t priority)
357 {
358         DEBUG(DEBUG_ERR,("Thawing priority %u\n", priority));
359
360         /* cancel any pending transactions */
361         if (ctdb->freeze_transaction_started) {
362                 struct ctdb_db_context *ctdb_db;
363
364                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
365                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
366                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
367                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
368                                          ctdb_db->db_name));
369                         }
370                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
371                 }
372         }
373         ctdb->freeze_transaction_started = false;
374
375 #if 0
376         /* this hack can be used to get a copy of the databases at the end of a recovery */
377         system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
378 #endif
379
380 #if 0
381         /* and this one for local testing */
382         system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
383 #endif
384
385         if (ctdb->freeze_handles[priority] != NULL) {
386                 talloc_free(ctdb->freeze_handles[priority]);
387                 ctdb->freeze_handles[priority] = NULL;
388         }
389 }
390
391 /*
392   thaw the databases
393  */
394 int32_t ctdb_control_thaw(struct ctdb_context *ctdb, uint32_t priority)
395 {
396
397         if (priority > NUM_DB_PRIORITIES) {
398                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
399                 return -1;
400         }
401
402         if (priority == 0) {
403                 int i;
404                 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
405                         thaw_priority(ctdb, i);
406                 }
407         } else {
408                 thaw_priority(ctdb, priority);
409         }
410
411         ctdb_call_resend_all(ctdb);
412         return 0;
413 }
414
415
416 /*
417   start a transaction on all databases - used for recovery
418  */
419 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
420 {
421         struct ctdb_db_context *ctdb_db;
422         int i;
423
424         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
425                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
426                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
427                         return -1;
428                 }
429         }
430
431         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
432                 int ret;
433
434                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
435
436                 if (ctdb->freeze_transaction_started) {
437                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
438                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
439                                          ctdb_db->db_name));
440                                 /* not a fatal error */
441                         }
442                 }
443
444                 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
445
446                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
447
448                 if (ret != 0) {
449                         DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
450                                  ctdb_db->db_name));
451                         return -1;
452                 }
453         }
454
455         ctdb->freeze_transaction_started = true;
456         ctdb->freeze_transaction_id = id;
457
458         return 0;
459 }
460
461 /*
462   cancel a transaction for all databases - used for recovery
463  */
464 int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb)
465 {
466         struct ctdb_db_context *ctdb_db;
467
468         DEBUG(DEBUG_ERR,(__location__ " recovery transaction cancelled called\n"));
469
470         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
471                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
472
473                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
474                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",  ctdb_db->db_name));
475                         /* not a fatal error */
476                 }
477
478                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
479         }
480
481         ctdb->freeze_transaction_started = false;
482
483         return 0;
484 }
485
486 /*
487   commit transactions on all databases
488  */
489 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
490 {
491         struct ctdb_db_context *ctdb_db;
492         int i;
493         int healthy_nodes = 0;
494
495         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
496                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
497                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
498                         return -1;
499                 }
500         }
501
502         if (!ctdb->freeze_transaction_started) {
503                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
504                 return -1;
505         }
506
507         if (id != ctdb->freeze_transaction_id) {
508                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
509                 return -1;
510         }
511
512         DEBUG(DEBUG_DEBUG,(__location__ " num_nodes[%d]\n", ctdb->num_nodes));
513         for (i=0; i < ctdb->num_nodes; i++) {
514                 DEBUG(DEBUG_DEBUG,(__location__ " node[%d].flags[0x%X]\n",
515                                    i, ctdb->nodes[i]->flags));
516                 if (ctdb->nodes[i]->flags == 0) {
517                         healthy_nodes++;
518                 }
519         }
520         DEBUG(DEBUG_INFO,(__location__ " healthy_nodes[%d]\n", healthy_nodes));
521
522         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
523                 int ret;
524
525                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
526                 ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
527                 if (ret != 0) {
528                         DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
529                                  ctdb_db->db_name));
530                         goto fail;
531                 }
532                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
533
534                 ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, healthy_nodes);
535                 if (ret != 0) {
536                         DEBUG(DEBUG_CRIT,(__location__ " Failed to update persistent health for db '%s'. "
537                                          "Cancel all remaining transactions and resetting transaction_started to false.\n",
538                                          ctdb_db->db_name));
539                         goto fail;
540                 }
541         }
542
543         ctdb->freeze_transaction_started = false;
544         ctdb->freeze_transaction_id = 0;
545
546         return 0;
547
548 fail:
549         /* cancel any pending transactions */
550         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
551                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
552                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
553                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
554                                  ctdb_db->db_name));
555                 }
556                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
557         }
558         ctdb->freeze_transaction_started = false;
559
560         return -1;
561 }
562
563 /*
564   wipe a database - only possible when in a frozen transaction
565  */
566 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
567 {
568         struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
569         struct ctdb_db_context *ctdb_db;
570
571         ctdb_db = find_ctdb_db(ctdb, w.db_id);
572         if (!ctdb_db) {
573                 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
574                 return -1;
575         }
576
577         if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) {
578                 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
579                 return -1;
580         }
581
582         if (!ctdb->freeze_transaction_started) {
583                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
584                 return -1;
585         }
586
587         if (w.transaction_id != ctdb->freeze_transaction_id) {
588                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
589                 return -1;
590         }
591
592         if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
593                 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
594                          ctdb_db->db_name));
595                 return -1;
596         }
597
598         return 0;
599 }