set the TDB_NO_NESTING flag for the tdb before we start a transaction from within...
[sahlberg/ctdb.git] / server / ctdb_freeze.c
1 /* 
2    ctdb freeze handling
3
4    Copyright (C) Andrew Tridgell  2007
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "includes.h"
20 #include "lib/events/events.h"
21 #include "lib/tdb/include/tdb.h"
22 #include "system/network.h"
23 #include "system/filesys.h"
24 #include "system/wait.h"
25 #include "../include/ctdb_private.h"
26 #include "lib/util/dlinklist.h"
27 #include "db_wrap.h"
28
29
30 /*
31   lock all databases
32  */
33 static int ctdb_lock_all_databases(struct ctdb_context *ctdb)
34 {
35         struct ctdb_db_context *ctdb_db;
36         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
37                 if (tdb_lockall(ctdb_db->ltdb->tdb) != 0) {
38                         return -1;
39                 }
40         }
41         return 0;
42 }
43
44 /*
45   a list of control requests waiting for a freeze lock child to get
46   the database locks
47  */
48 struct ctdb_freeze_waiter {
49         struct ctdb_freeze_waiter *next, *prev;
50         struct ctdb_context *ctdb;
51         struct ctdb_req_control *c;
52         int32_t status;
53 };
54
55 /* a handle to a freeze lock child process */
56 struct ctdb_freeze_handle {
57         struct ctdb_context *ctdb;
58         pid_t child;
59         int fd;
60         struct ctdb_freeze_waiter *waiters;
61         bool transaction_started;
62         uint32_t transaction_id;
63 };
64
65 /*
66   destroy a freeze handle
67  */     
68 static int ctdb_freeze_handle_destructor(struct ctdb_freeze_handle *h)
69 {
70         struct ctdb_context *ctdb = h->ctdb;
71         struct ctdb_db_context *ctdb_db;
72
73         /* cancel any pending transactions */
74         if (ctdb->freeze_handle && ctdb->freeze_handle->transaction_started) {
75                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
76                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
77                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
78                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
79                                          ctdb_db->db_name));
80                         }
81                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
82                 }
83                 ctdb->freeze_handle->transaction_started = false;
84         }
85
86         ctdb->freeze_mode = CTDB_FREEZE_NONE;
87         ctdb->freeze_handle = NULL;
88
89         kill(h->child, SIGKILL);
90         return 0;
91 }
92
93 /*
94   called when the child writes its status to us
95  */
96 static void ctdb_freeze_lock_handler(struct event_context *ev, struct fd_event *fde, 
97                                        uint16_t flags, void *private_data)
98 {
99         struct ctdb_freeze_handle *h = talloc_get_type(private_data, struct ctdb_freeze_handle);
100         int32_t status;
101         struct ctdb_freeze_waiter *w;
102
103         if (h->ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
104                 DEBUG(DEBUG_INFO,("freeze child died - unfreezing\n"));
105                 if (h->ctdb->freeze_handle == h) {
106                         h->ctdb->freeze_handle = NULL;
107                 }
108                 talloc_free(h);
109                 return;
110         }
111
112         if (read(h->fd, &status, sizeof(status)) != sizeof(status)) {
113                 DEBUG(DEBUG_ERR,("read error from freeze lock child\n"));
114                 status = -1;
115         }
116
117         if (status == -1) {
118                 DEBUG(DEBUG_ERR,("Failed to get locks in ctdb_freeze_child\n"));
119                 /* we didn't get the locks - destroy the handle */
120                 talloc_free(h);
121                 return;
122         }
123
124         h->ctdb->freeze_mode = CTDB_FREEZE_FROZEN;
125
126         /* notify the waiters */
127         while ((w = h->ctdb->freeze_handle->waiters)) {
128                 w->status = status;
129                 DLIST_REMOVE(h->ctdb->freeze_handle->waiters, w);
130                 talloc_free(w);
131         }
132 }
133
134 /*
135   create a child which gets locks on all the open databases, then calls the callback telling the parent
136   that it is done
137  */
138 static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb)
139 {
140         struct ctdb_freeze_handle *h;
141         int fd[2];
142         struct fd_event *fde;
143
144         h = talloc_zero(ctdb, struct ctdb_freeze_handle);
145         CTDB_NO_MEMORY_NULL(ctdb, h);
146
147         h->ctdb = ctdb;
148
149         /* use socketpair() instead of pipe() so we have bi-directional fds */
150         if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) != 0) {
151                 DEBUG(DEBUG_ERR,("Failed to create pipe for ctdb_freeze_lock\n"));
152                 talloc_free(h);
153                 return NULL;
154         }
155         
156         h->child = fork();
157         if (h->child == -1) {
158                 DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
159                 talloc_free(h);
160                 return NULL;
161         }
162
163         if (h->child == 0) {
164                 int ret;
165                 int count = 0;
166                 /* in the child */
167                 close(fd[0]);
168                 ret = ctdb_lock_all_databases(ctdb);
169                 if (ret != 0) {
170                         _exit(0);
171                 }
172
173                 alarm(30);
174
175                 while (count++ < 30) {
176                         ret = write(fd[1], &ret, sizeof(ret));
177                         if (ret == sizeof(ret)) {
178                                 break;
179                         }
180                         DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. ret:%d errno:%u\n", ret, errno));
181                         sleep (1);
182                 }
183                 if (count >= 30) {
184                         DEBUG(DEBUG_ERR, (__location__ " Failed to write to socket from freeze child. Aborting freeze child\n"));
185                         _exit(0);
186                 }
187
188                 /* the read here means we will die if the parent exits */
189                 read(fd[1], &ret, sizeof(ret));
190                 _exit(0);
191         }
192
193         talloc_set_destructor(h, ctdb_freeze_handle_destructor);
194
195         close(fd[1]);
196
197         h->fd = fd[0];
198
199         fde = event_add_fd(ctdb->ev, h, h->fd, EVENT_FD_READ|EVENT_FD_AUTOCLOSE, 
200                            ctdb_freeze_lock_handler, h);
201         if (fde == NULL) {
202                 DEBUG(DEBUG_ERR,("Failed to setup fd event for ctdb_freeze_lock\n"));
203                 close(fd[0]);
204                 talloc_free(h);
205                 return NULL;
206         }
207
208         return h;
209 }
210
211 /*
212   destroy a waiter for a freeze mode change
213  */
214 static int ctdb_freeze_waiter_destructor(struct ctdb_freeze_waiter *w)
215 {
216         DLIST_REMOVE(w->ctdb->freeze_handle->waiters, w);
217         ctdb_request_control_reply(w->ctdb, w->c, NULL, w->status, NULL);
218         return 0;
219 }
220
221 /*
222   start the freeze process
223  */
224 void ctdb_start_freeze(struct ctdb_context *ctdb)
225 {
226         if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
227                 /* we're already frozen */
228                 return;
229         }
230
231         /* if there isn't a freeze lock child then create one */
232         if (!ctdb->freeze_handle) {
233                 ctdb->freeze_handle = ctdb_freeze_lock(ctdb);
234                 CTDB_NO_MEMORY_VOID(ctdb, ctdb->freeze_handle);
235                 ctdb->freeze_mode = CTDB_FREEZE_PENDING;
236         }
237 }
238
239 /*
240   freeze the databases
241  */
242 int32_t ctdb_control_freeze(struct ctdb_context *ctdb, struct ctdb_req_control *c, bool *async_reply)
243 {
244         struct ctdb_freeze_waiter *w;
245
246         if (ctdb->freeze_mode == CTDB_FREEZE_FROZEN) {
247                 /* we're already frozen */
248                 return 0;
249         }
250
251         ctdb_start_freeze(ctdb);
252
253         /* add ourselves to list of waiters */
254         w = talloc(ctdb->freeze_handle, struct ctdb_freeze_waiter);
255         CTDB_NO_MEMORY(ctdb, w);
256         w->ctdb   = ctdb;
257         w->c      = talloc_steal(w, c);
258         w->status = -1;
259         talloc_set_destructor(w, ctdb_freeze_waiter_destructor);
260         DLIST_ADD(ctdb->freeze_handle->waiters, w);
261
262         /* we won't reply till later */
263         *async_reply = True;
264         return 0;
265 }
266
267
268 /*
269   block until we are frozen, used during daemon startup
270  */
271 bool ctdb_blocking_freeze(struct ctdb_context *ctdb)
272 {
273         ctdb_start_freeze(ctdb);
274
275         /* block until frozen */
276         while (ctdb->freeze_mode == CTDB_FREEZE_PENDING) {
277                 event_loop_once(ctdb->ev);
278         }
279
280         return ctdb->freeze_mode == CTDB_FREEZE_FROZEN;
281 }
282
283
284
285 /*
286   thaw the databases
287  */
288 int32_t ctdb_control_thaw(struct ctdb_context *ctdb)
289 {
290         /* cancel any pending transactions */
291         if (ctdb->freeze_handle && ctdb->freeze_handle->transaction_started) {
292                 struct ctdb_db_context *ctdb_db;
293
294                 for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
295                         tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
296                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
297                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
298                                          ctdb_db->db_name));
299                         }
300                         tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
301                 }
302         }
303
304 #if 0
305         /* this hack can be used to get a copy of the databases at the end of a recovery */
306         system("mkdir -p /var/ctdb.saved; /usr/bin/rsync --delete -a /var/ctdb/ /var/ctdb.saved/$$ 2>&1 > /dev/null");
307 #endif
308
309 #if 0
310         /* and this one for local testing */
311         system("mkdir -p test.db.saved; /usr/bin/rsync --delete -a test.db/ test.db.saved/$$ 2>&1 > /dev/null");
312 #endif
313
314
315         talloc_free(ctdb->freeze_handle);
316         ctdb->freeze_handle = NULL;
317         ctdb_call_resend_all(ctdb);
318         return 0;
319 }
320
321
322 /*
323   start a transaction on all databases - used for recovery
324  */
325 int32_t ctdb_control_transaction_start(struct ctdb_context *ctdb, uint32_t id)
326 {
327         struct ctdb_db_context *ctdb_db;
328
329         if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
330                 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
331                 return -1;
332         }
333
334
335         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
336                 int ret;
337
338                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
339
340                 if (ctdb->freeze_handle->transaction_started) {
341                         if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
342                                 DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
343                                          ctdb_db->db_name));
344                                 /* not a fatal error */
345                         }
346                 }
347
348                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NO_NESTING);
349                 ret = tdb_transaction_start(ctdb_db->ltdb->tdb);
350
351                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
352
353                 if (ret != 0) {
354                         DEBUG(DEBUG_ERR,(__location__ " Failed to start transaction for db '%s'\n",
355                                  ctdb_db->db_name));
356                         return -1;
357                 }
358         }
359
360         ctdb->freeze_handle->transaction_started = true;
361         ctdb->freeze_handle->transaction_id = id;
362
363         return 0;
364 }
365
366 /*
367   commit transactions on all databases
368  */
369 int32_t ctdb_control_transaction_commit(struct ctdb_context *ctdb, uint32_t id)
370 {
371         struct ctdb_db_context *ctdb_db;
372
373         if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
374                 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
375                 return -1;
376         }
377
378         if (!ctdb->freeze_handle->transaction_started) {
379                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
380                 return -1;
381         }
382
383         if (id != ctdb->freeze_handle->transaction_id) {
384                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", id));
385                 return -1;
386         }
387
388         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
389                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
390                 if (tdb_transaction_commit(ctdb_db->ltdb->tdb) != 0) {
391                         DEBUG(DEBUG_ERR,(__location__ " Failed to commit transaction for db '%s'. Cancel all transactions and resetting transaction_started to false.\n",
392                                  ctdb_db->db_name));
393
394                         /* cancel any pending transactions */
395                         for (ctdb_db=ctdb->db_list;ctdb_db;ctdb_db=ctdb_db->next) {
396                                 tdb_add_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
397                                 if (tdb_transaction_cancel(ctdb_db->ltdb->tdb) != 0) {
398                                         DEBUG(DEBUG_ERR,(__location__ " Failed to cancel transaction for db '%s'\n",
399                                                  ctdb_db->db_name));
400                                 }
401                                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
402                         }
403                         ctdb->freeze_handle->transaction_started = false;
404
405                         return -1;
406                 }
407                 tdb_remove_flags(ctdb_db->ltdb->tdb, TDB_NOLOCK);
408         }
409
410         ctdb->freeze_handle->transaction_started = false;
411         ctdb->freeze_handle->transaction_id = 0;
412
413         return 0;
414 }
415
416 /*
417   wipe a database - only possible when in a frozen transaction
418  */
419 int32_t ctdb_control_wipe_database(struct ctdb_context *ctdb, TDB_DATA indata)
420 {
421         struct ctdb_control_wipe_database w = *(struct ctdb_control_wipe_database *)indata.dptr;
422         struct ctdb_db_context *ctdb_db;
423
424         if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) {
425                 DEBUG(DEBUG_ERR,(__location__ " Failed transaction_start while not frozen\n"));
426                 return -1;
427         }
428
429         if (!ctdb->freeze_handle->transaction_started) {
430                 DEBUG(DEBUG_ERR,(__location__ " transaction not started\n"));
431                 return -1;
432         }
433
434         if (w.transaction_id != ctdb->freeze_handle->transaction_id) {
435                 DEBUG(DEBUG_ERR,(__location__ " incorrect transaction id 0x%x in commit\n", w.transaction_id));
436                 return -1;
437         }
438
439         ctdb_db = find_ctdb_db(ctdb, w.db_id);
440         if (!ctdb_db) {
441                 DEBUG(DEBUG_ERR,(__location__ " Unknown db 0x%x\n", w.db_id));
442                 return -1;
443         }
444
445         if (tdb_wipe_all(ctdb_db->ltdb->tdb) != 0) {
446                 DEBUG(DEBUG_ERR,(__location__ " Failed to wipe database for db '%s'\n",
447                          ctdb_db->db_name));
448                 return -1;
449         }
450
451         return 0;
452 }