idtree: fix overflow for v. large ids on allocation and removal
[sahlberg/ctdb.git] / server / ctdb_freeze.c
1 /* 
2    ctdb freeze handling
3
4    Copyright (C) Andrew Tridgell  2007
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "includes.h"
20 #include "lib/tevent/tevent.h"
21 #include "lib/tdb/include/tdb.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/util/dlinklist.h"
27 #include "db_wrap.h"
28
29 static bool later_db(const char *name)
30 {
31         return (strstr(name, "notify") || strstr(name, "serverid"));
32 }
33
34 /*
35   lock all databases
36  */
37 static int ctdb_lock_all_databases(struct ctdb_context *ctdb, uint32_t priority)
38 {
39         struct ctdb_db_context *ctdb_db;
40         /* REMOVE later */
41         /* This double loop is for backward compatibility and deadlock
42            avoidance for old samba versions that not yet support
43            the set prio call.
44            This code shall be removed later
45         */
46         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
47                 if (ctdb_db->priority != priority) {
48                         continue;
49                 }
50                 if (later_db(ctdb_db->db_name)) {
51                         continue;
52                 }
53                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
54                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
55                         DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
56                         return -1;
57                 }
58         }
59         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
60                 if (ctdb_db->priority != priority) {
61                         continue;
62                 }
63                 if (!later_db(ctdb_db->db_name)) {
64                         continue;
65                 }
66                 DEBUG(DEBUG_INFO,("locking database 0x%08x priority:%u %s\n", ctdb_db->db_id, ctdb_db->priority, ctdb_db->db_name));
67                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
68                         DEBUG(DEBUG_ERR,(__location__ " Failed to lock database %s\n", ctdb_db->db_name));
69                         return -1;
70                 }
71         }
72         return 0;
73 }
74
75 /*
76   a list of control requests waiting for a freeze lock child to get
77   the database locks
78  */
79 struct ctdb_freeze_waiter {
80         struct ctdb_freeze_waiter *next, *prev;
81         struct ctdb_context *ctdb;
82         struct ctdb_req_control *c;
83         uint32_t priority;
84         int32_t status;
85 };
86
87 /* a handle to a freeze lock child process */
88 struct ctdb_freeze_handle {
89         struct ctdb_context *ctdb;
90         uint32_t priority;
91         pid_t child;
92         int fd;
93         struct ctdb_freeze_waiter *waiters;
94 };
95
96 /*
97   destroy a freeze handle
98  */     
99 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
100 {
101         struct ctdb_context *ctdb = h->ctdb;
102         struct ctdb_db_context *ctdb_db;
103
104         DEBUG(DEBUG_ERR,("Release freeze handler for prio %u\n", h->priority));
105
106         /* cancel any pending transactions */
107         if (ctdb->freeze_transaction_started) {
108                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
109                         if (ctdb_db->priority != h->priority) {
110                                 continue;
111                         }
112                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
113                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
114                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
115                                          ctdb_db->db_name));
116                         }
117                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
118                 }
119                 ctdb->freeze_transaction_started = false;
120         }
121
122         ctdb->freeze_mode[h->priority]    = CTDB_FREEZE_NONE;
123         ctdb->freeze_handles[h->priority] = NULL;
124
125         kill(h->child, SIGKILL);
126         return 0;
127 }
128
129 /*
130   called when the child writes its status to us
131  */
132 static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde, 
133                                        uint16_t flags, void *private_data)
134 {
135         struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle);
136         int32_t status;
137         struct ctdb_freeze_waiter *w;
138
139         if (h->ctdb->freeze_mode[h->priority] == CTDB_FREEZE_FROZEN) {
140                 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
141                 talloc_free(h);
142                 return;
143         }
144
145         if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
146                 DEBUG(DEBUG_ERR,("read error from freeze lock child\n"));
147                 status = -1;
148         }
149
150         if (status == -1) {
151                 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
152                 /* we didn't get the locks - destroy the handle */
153                 talloc_free(h);
154                 return;
155         }
156
157         h->ctdb->freeze_mode[h->priority] = CTDB_FREEZE_FROZEN;
158
159         /* notify the waiters */
160         if (h != h->ctdb->freeze_handles[h->priority]) {
161                 DEBUG(DEBUG_ERR,("lockwait finished but h is not linked\n"));
162         }
163         while ((w = h->waiters)) {
164                 w->status = status;
165                 DLIST_REMOVE(h->waiters, w);
166                 talloc_free(w);
167         }
168 }
169
170 /*
171   create a child which gets locks on all the open databases, then calls the callback telling the parent
172   that it is done
173  */
174 static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, uint32_t priority)
175 {
176         struct ctdb_freeze_handle *h;
177         int fd[2];
178         struct fd_event *fde;
179
180         h = talloc_zero(ctdb, struct ctdb_freeze_handle);
181         CTDB_NO_MEMORY_NULL(ctdb, h);
182
183         h->ctdb     = ctdb;
184         h->priority = priority;
185
186         if (pipe(fd) == -1) {
187                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
188                 talloc_free(h);
189                 return NULL;
190         }
191         
192         h->child = fork();
193         if (h->child == -1) {
194                 DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
195                 talloc_free(h);
196                 return NULL;
197         }
198
199         if (h->child == 0) {
200                 int ret;
201
202                 /* in the child */
203                 close(fd[0]);
204
205                 debug_extra = talloc_asprintf(NULL, "freeze_lock-%u:", priority);
206                 ret = ctdb_lock_all_databases(ctdb, priority);
207                 if (ret != 0) {
208                         _exit(0);
209                 }
210
211                 ret = write(fd[1], &ret, sizeof(ret));
212                 if (ret != sizeof(ret)) {
213                         DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. ret:%d errno:%u\n", ret, errno));
214                         _exit(1);
215                 }
216
217                 while (1) {
218                         sleep(1);
219                         if (kill(ctdb->ctdbd_pid, 0) != 0) {
220                                 DEBUG(DEBUG_ERR,("Parent died. Exiting lock wait child\n"));
221
222                                 _exit(0);
223                         }
224                 }
225         }
226
227         talloc_set_destructor(h, ctdb_freeze_handle_destructor);
228
229         close(fd[1]);
230         set_close_on_exec(fd[0]);
231
232         h->fd = fd[0];
233
234
235         fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ,
236                            ctdb_freeze_lock_handler, h);
237         if (fde == NULL) {
238                 DEBUG(DEBUG_ERR,("Failed to setup fd event for ctdb_freeze_lock\n"));
239                 close(fd[0]);
240                 talloc_free(h);
241                 return NULL;
242         }
243         tevent_fd_set_auto_close(fde);
244
245         return h;
246 }
247
248 /*
249   destroy a waiter for a freeze mode change
250  */
251 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
252 {
253         ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
254         return 0;
255 }
256
257 /*
258   start the freeze process for a certain priority
259  */
260 int ctdb_start_freeze(struct ctdb_context *ctdb, uint32_t priority)
261 {
262         if (priority == 0) {
263                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
264                 priority = 1;
265         }
266
267         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
268                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
269                 return -1;
270         }
271
272         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
273                 /* we're already frozen */
274                 return 0;
275         }
276
277         /* Stop any vacuuming going on: we don't want to wait. */
278         ctdb_stop_vacuuming(ctdb);
279
280         /* if there isn't a freeze lock child then create one */
281         if (ctdb->freeze_handles[priority] == NULL) {
282                 ctdb->freeze_handles[priority] = ctdb_freeze_lock(ctdb, priority);
283                 CTDB_NO_MEMORY(ctdb, ctdb->freeze_handles[priority]);
284                 ctdb->freeze_mode[priority] = CTDB_FREEZE_PENDING;
285         }
286
287         return 0;
288 }
289
290 /*
291   freeze the databases
292  */
293 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
294 {
295         struct ctdb_freeze_waiter *w;
296         uint32_t priority;
297
298         priority = (uint32_t)c->srvid;
299
300         DEBUG(DEBUG_ERR, ("Freeze priority %u\n", priority));
301
302         if (priority == 0) {
303                 DEBUG(DEBUG_ERR,("Freeze priority 0 requested, remapping to priority 1\n"));
304                 priority = 1;
305         }
306
307         if ((priority < 1) || (priority > NUM_DB_PRIORITIES)) {
308                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
309                 return -1;
310         }
311
312         if (ctdb->freeze_mode[priority] == CTDB_FREEZE_FROZEN) {
313                 /* we're already frozen */
314                 return 0;
315         }
316
317         if (ctdb_start_freeze(ctdb, priority) != 0) {
318                 DEBUG(DEBUG_ERR,(__location__ " Failed to start freezing databases with priority %u\n", priority));
319                 return -1;
320         }
321
322         /* add ourselves to list of waiters */
323         if (ctdb->freeze_handles[priority] == NULL) {
324                 DEBUG(DEBUG_ERR,("No freeze lock handle when adding a waiter\n"));
325                 return -1;
326         }
327
328         w = talloc(ctdb->freeze_handles[priority], struct ctdb_freeze_waiter);
329         CTDB_NO_MEMORY(ctdb, w);
330         w->ctdb     = ctdb;
331         w->c        = talloc_steal(w, c);
332         w->priority = priority;
333         w->status   = -1;
334         talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
335         DLIST_ADD(ctdb->freeze_handles[priority]->waiters, w);
336
337         /* we won't reply till later */
338         *async_reply = True;
339         return 0;
340 }
341
342
343 /*
344   block until we are frozen, used during daemon startup
345  */
346 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
347 {
348         int i;
349
350         for (i=1; i<=NUM_DB_PRIORITIES; i++) {
351                 if (ctdb_start_freeze(ctdb, i)) {
352                         DEBUG(DEBUG_ERR,(__location__ " Failed to freeze databases of prio %u\n", i));
353                         continue;
354                 }
355
356                 /* block until frozen */
357                 while (ctdb->freeze_mode[i] == CTDB_FREEZE_PENDING) {
358                         event_loop_once(ctdb->ev);
359                 }
360         }
361
362         return 0;
363 }
364
365
366 static void thaw_priority(struct ctdb_context *ctdb, uint32_t priority)
367 {
368         DEBUG(DEBUG_ERR,("Thawing priority %u\n", priority));
369
370         /* cancel any pending transactions */
371         if (ctdb->freeze_transaction_started) {
372                 struct ctdb_db_context *ctdb_db;
373
374                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
375                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
376                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
377                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
378                                          ctdb_db->db_name));
379                         }
380                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
381                 }
382         }
383         ctdb->freeze_transaction_started = false;
384
385 #if 0
386         /* this hack can be used to get a copy of the databases at the end of a recovery */
387         system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
388 #endif
389
390 #if 0
391         /* and this one for local testing */
392         system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
393 #endif
394
395         if (ctdb->freeze_handles[priority] != NULL) {
396                 talloc_free(ctdb->freeze_handles[priority]);
397                 ctdb->freeze_handles[priority] = NULL;
398         }
399 }
400
401 /*
402   thaw the databases
403  */
404 int32_t ctdb_control_thaw(struct ctdb_context *ctdb, uint32_t priority)
405 {
406
407         if (priority > NUM_DB_PRIORITIES) {
408                 DEBUG(DEBUG_ERR,(__location__ " Invalid db priority : %u\n", priority));
409                 return -1;
410         }
411
412         if (priority == 0) {
413                 int i;
414                 for (i=1;i<=NUM_DB_PRIORITIES; i++) {
415                         thaw_priority(ctdb, i);
416                 }
417         } else {
418                 thaw_priority(ctdb, priority);
419         }
420
421         ctdb_call_resend_all(ctdb);
422         return 0;
423 }
424
425
426 /*
427   start a transaction on all databases - used for recovery
428  */
429 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
430 {
431         struct ctdb_db_context *ctdb_db;
432         int i;
433
434         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
435                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
436                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
437                         return -1;
438                 }
439         }
440
441         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
442                 int ret;
443
444                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
445
446                 if (ctdb->freeze_transaction_started) {
447                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
448                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
449                                          ctdb_db->db_name));
450                                 /* not a fatal error */
451                         }
452                 }
453
454                 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
455
456                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
457
458                 if (ret != 0) {
459                         DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
460                                  ctdb_db->db_name));
461                         return -1;
462                 }
463         }
464
465         ctdb->freeze_transaction_started = true;
466         ctdb->freeze_transaction_id = id;
467
468         return 0;
469 }
470
471 /*
472   cancel a transaction for all databases - used for recovery
473  */
474 int32_t ctdb_control_transaction_cancel(struct ctdb_context *ctdb)
475 {
476         struct ctdb_db_context *ctdb_db;
477
478         DEBUG(DEBUG_ERR,(__location__ " recovery transaction cancelled called\n"));
479
480         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
481                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
482
483                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
484                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",  ctdb_db->db_name));
485                         /* not a fatal error */
486                 }
487
488                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
489         }
490
491         ctdb->freeze_transaction_started = false;
492
493         return 0;
494 }
495
496 /*
497   commit transactions on all databases
498  */
499 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
500 {
501         struct ctdb_db_context *ctdb_db;
502         int i;
503         int healthy_nodes = 0;
504
505         for (i=1;i<=NUM_DB_PRIORITIES; i++) {
506                 if (ctdb->freeze_mode[i] != CTDB_FREEZE_FROZEN) {
507                         DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
508                         return -1;
509                 }
510         }
511
512         if (!ctdb->freeze_transaction_started) {
513                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
514                 return -1;
515         }
516
517         if (id != ctdb->freeze_transaction_id) {
518                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
519                 return -1;
520         }
521
522         DEBUG(DEBUG_DEBUG,(__location__ " num_nodes[%d]\n", ctdb->num_nodes));
523         for (i=0; i < ctdb->num_nodes; i++) {
524                 DEBUG(DEBUG_DEBUG,(__location__ " node[%d].flags[0x%X]\n",
525                                    i, ctdb->nodes[i]->flags));
526                 if (ctdb->nodes[i]->flags == 0) {
527                         healthy_nodes++;
528                 }
529         }
530         DEBUG(DEBUG_INFO,(__location__ " healthy_nodes[%d]\n", healthy_nodes));
531
532         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
533                 int ret;
534
535                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
536                 ret = tdb_transaction_commit(ctdb_db->ltdb->tdb);
537                 if (ret != 0) {
538                         DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
539                                  ctdb_db->db_name));
540                         goto fail;
541                 }
542                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
543
544                 ret = ctdb_update_persistent_health(ctdb, ctdb_db, NULL, healthy_nodes);
545                 if (ret != 0) {
546                         DEBUG(DEBUG_CRIT,(__location__ " Failed to update persistent health for db '%s'. "
547                                          "Cancel all remaining transactions and resetting transaction_started to false.\n",
548                                          ctdb_db->db_name));
549                         goto fail;
550                 }
551         }
552
553         ctdb->freeze_transaction_started = false;
554         ctdb->freeze_transaction_id = 0;
555
556         return 0;
557
558 fail:
559         /* cancel any pending transactions */
560         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
561                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
562                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
563                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
564                                  ctdb_db->db_name));
565                 }
566                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
567         }
568         ctdb->freeze_transaction_started = false;
569
570         return -1;
571 }
572
573 /*
574   wipe a database - only possible when in a frozen transaction
575  */
576 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
577 {
578         struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
579         struct ctdb_db_context *ctdb_db;
580
581         ctdb_db = find_ctdb_db(ctdb, w.db_id);
582         if (!ctdb_db) {
583                 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
584                 return -1;
585         }
586
587         if (ctdb->freeze_mode[ctdb_db->priority] != CTDB_FREEZE_FROZEN) {
588                 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
589                 return -1;
590         }
591
592         if (!ctdb->freeze_transaction_started) {
593                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
594                 return -1;
595         }
596
597         if (w.transaction_id != ctdb->freeze_transaction_id) {
598                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
599                 return -1;
600         }
601
602         if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
603                 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
604                          ctdb_db->db_name));
605                 return -1;
606         }
607
608         return 0;
609 }