2 Unix SMB/CIFS implementation.
3 global locks based on dbwrap and messaging
4 Copyright (C) 2009 by Volker Lendecke
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
23 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
24 struct server_id pid);
27 struct db_context *db;
28 struct messaging_context *msg;
32 * The "g_lock.tdb" file contains records, indexed by the 0-terminated
33 * lockname. The record contains an array of "struct g_lock_rec"
34 * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed.
38 enum g_lock_type lock_type;
42 struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx,
43 struct messaging_context *msg)
45 struct g_lock_ctx *result;
47 result = talloc(mem_ctx, struct g_lock_ctx);
53 result->db = db_open(result, lock_path("g_lock.tdb"), 0,
54 TDB_CLEAR_IF_FIRST, O_RDWR|O_CREAT, 0700);
55 if (result->db == NULL) {
56 DEBUG(1, ("g_lock_init: Could not open g_lock.tdb"));
63 static bool g_lock_conflicts(enum g_lock_type lock_type,
64 const struct g_lock_rec *rec)
66 enum g_lock_type rec_lock = rec->lock_type;
68 if ((rec_lock & G_LOCK_PENDING) != 0) {
73 * Only tested write locks so far. Very likely this routine
74 * needs to be fixed for read locks....
76 if ((lock_type == G_LOCK_READ) && (rec_lock == G_LOCK_READ)) {
82 static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
83 int *pnum_locks, struct g_lock_rec **plocks)
86 struct g_lock_rec *locks;
88 if ((data.dsize % sizeof(struct g_lock_rec)) != 0) {
89 DEBUG(1, ("invalid lock record length %d\n", (int)data.dsize));
93 num_locks = data.dsize / sizeof(struct g_lock_rec);
94 locks = talloc_array(mem_ctx, struct g_lock_rec, num_locks);
96 DEBUG(1, ("talloc failed\n"));
100 memcpy(locks, data.dptr, data.dsize);
102 DEBUG(10, ("locks:\n"));
103 for (i=0; i<num_locks; i++) {
104 DEBUGADD(10, ("%s: %s %s\n",
105 procid_str(debug_ctx(), &locks[i].pid),
106 ((locks[i].lock_type & 1) == G_LOCK_READ) ?
108 (locks[i].lock_type & G_LOCK_PENDING) ?
109 "(pending)" : "(owner)"));
111 if (((locks[i].lock_type & G_LOCK_PENDING) == 0)
112 && !process_exists(locks[i].pid)) {
114 DEBUGADD(10, ("lock owner %s died -- discarding\n",
115 procid_str(talloc_tos(),
118 if (i < (num_locks-1)) {
119 locks[i] = locks[num_locks-1];
126 *pnum_locks = num_locks;
130 static void g_lock_cleanup(int *pnum_locks, struct g_lock_rec *locks)
134 num_locks = *pnum_locks;
136 DEBUG(10, ("g_lock_cleanup: %d locks\n", num_locks));
138 for (i=0; i<num_locks; i++) {
139 if (process_exists(locks[i].pid)) {
142 DEBUGADD(10, ("%s does not exist -- discarding\n",
143 procid_str(debug_ctx(), &locks[i].pid)));
145 if (i < (num_locks-1)) {
146 locks[i] = locks[num_locks-1];
150 *pnum_locks = num_locks;
154 static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
155 struct g_lock_rec *locks,
157 const struct server_id pid,
158 enum g_lock_type lock_type)
160 struct g_lock_rec *result;
161 int num_locks = *pnum_locks;
163 result = talloc_realloc(mem_ctx, locks, struct g_lock_rec,
165 if (result == NULL) {
169 result[num_locks].pid = pid;
170 result[num_locks].lock_type = lock_type;
175 static void g_lock_got_retry(struct messaging_context *msg,
178 struct server_id server_id,
180 static void g_lock_timedout(struct tevent_context *ev,
181 struct tevent_timer *te,
182 struct timeval current_time,
185 static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name,
186 enum g_lock_type lock_type)
188 struct db_record *rec = NULL;
189 struct g_lock_rec *locks = NULL;
191 struct server_id self;
194 NTSTATUS status = NT_STATUS_OK;
195 NTSTATUS store_status;
198 rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
199 string_term_tdb_data(name));
201 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
202 status = NT_STATUS_LOCK_NOT_GRANTED;
206 if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
207 DEBUG(10, ("g_lock_parse for %s failed\n", name));
208 status = NT_STATUS_INTERNAL_ERROR;
212 self = procid_self();
215 for (i=0; i<num_locks; i++) {
216 if (procid_equal(&self, &locks[i].pid)) {
217 if (our_index != -1) {
218 DEBUG(1, ("g_lock_trylock: Added ourself "
220 status = NT_STATUS_INTERNAL_ERROR;
223 if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
224 DEBUG(1, ("g_lock_trylock: Found ourself not "
226 status = NT_STATUS_INTERNAL_ERROR;
232 /* never conflict with ourself */
235 if (g_lock_conflicts(lock_type, &locks[i])) {
236 struct server_id pid = locks[i].pid;
238 if (!process_exists(pid)) {
241 status = g_lock_force_unlock(ctx, name, pid);
242 if (!NT_STATUS_IS_OK(status)) {
243 DEBUG(1, ("Could not unlock dead lock "
249 lock_type |= G_LOCK_PENDING;
253 if (our_index == -1) {
254 /* First round, add ourself */
256 locks = g_lock_addrec(talloc_tos(), locks, &num_locks,
259 DEBUG(10, ("g_lock_addrec failed\n"));
260 status = NT_STATUS_NO_MEMORY;
265 * Retry. We were pending last time. Overwrite the
266 * stored lock_type with what we calculated, we might
267 * have acquired the lock this time.
269 locks[our_index].lock_type = lock_type;
272 if (NT_STATUS_IS_OK(status) && ((lock_type & G_LOCK_PENDING) == 0)) {
274 * Walk through the list of locks, search for dead entries
276 g_lock_cleanup(&num_locks, locks);
279 data = make_tdb_data((uint8_t *)locks, num_locks * sizeof(*locks));
280 store_status = rec->store(rec, data, 0);
281 if (!NT_STATUS_IS_OK(store_status)) {
282 DEBUG(1, ("rec->store failed: %s\n",
283 nt_errstr(store_status)));
284 status = store_status;
291 if (NT_STATUS_IS_OK(status) && (lock_type & G_LOCK_PENDING) != 0) {
292 return STATUS_PENDING;
298 NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
299 enum g_lock_type lock_type, struct timeval timeout)
301 struct tevent_timer *te = NULL;
304 struct timeval timeout_end;
305 struct timeval timeout_remaining;
306 struct timeval time_now;
308 DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type,
311 if (lock_type & ~1) {
312 DEBUG(1, ("Got invalid lock type %d for %s\n",
313 (int)lock_type, name));
314 return NT_STATUS_INVALID_PARAMETER;
317 #ifdef CLUSTER_SUPPORT
318 if (lp_clustering()) {
319 status = ctdb_watch_us(messaging_ctdbd_connection());
320 if (!NT_STATUS_IS_OK(status)) {
321 DEBUG(10, ("could not register retry with ctdb: %s\n",
328 status = messaging_register(ctx->msg, &retry, MSG_DBWRAP_G_LOCK_RETRY,
330 if (!NT_STATUS_IS_OK(status)) {
331 DEBUG(10, ("messaging_register failed: %s\n",
336 time_now = timeval_current();
337 timeout_end = timeval_sum(&time_now, &timeout);
341 fd_set *r_fds = NULL;
345 status = g_lock_trylock(ctx, name, lock_type);
346 if (NT_STATUS_IS_OK(status)) {
347 DEBUG(10, ("Got lock %s\n", name));
350 if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) {
351 DEBUG(10, ("g_lock_trylock failed: %s\n",
356 DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n"));
358 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
359 * !!! HACK ALERT --- FIX ME !!!
360 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
361 * What we really want to do here is to react to
362 * MSG_DBWRAP_G_LOCK_RETRY messages that are either sent
363 * by a client doing g_lock_unlock or by ourselves when
364 * we receive a CTDB_SRVID_SAMBA_NOTIFY or
365 * CTDB_SRVID_RECONFIGURE message from ctdbd, i.e. when
366 * either a client holding a lock or a complete node
369 * Doing this properly involves calling tevent_loop_once(),
370 * but doing this here with the main ctdbd messaging context
371 * creates a nested event loop when g_lock_lock() is called
372 * from the main event loop, e.g. in a tcon_and_X where the
373 * share_info.tdb needs to be initialized and is locked by
374 * another process, or when the remore registry is accessed
375 * for writing and some other process already holds a lock
376 * on the registry.tdb.
378 * So as a quick fix, we act a little coarsely here: we do
379 * a select on the ctdb connection fd and when it is readable
380 * or we get EINTR, then we retry without actually parsing
381 * any ctdb packages or dispatching messages. This means that
382 * we retry more often than intended by design, but this does
383 * not harm and it is unobtrusive. When we have finished,
384 * the main loop will pick up all the messages and ctdb
385 * packets. The only extra twist is that we cannot use timed
386 * events here but have to handcode a timeout.
389 #ifdef CLUSTER_SUPPORT
390 if (lp_clustering()) {
391 struct ctdbd_connection *conn = messaging_ctdbd_connection();
395 max_fd = ctdbd_conn_get_fd(conn);
396 FD_SET(max_fd, r_fds);
400 time_now = timeval_current();
401 timeout_remaining = timeval_until(&time_now, &timeout_end);
403 ret = sys_select(max_fd + 1, r_fds, NULL, NULL,
407 if (errno != EINTR) {
408 DEBUG(1, ("error calling select: %s\n",
410 status = NT_STATUS_INTERNAL_ERROR;
415 * This means a signal was received.
416 * It might have been a MSG_DBWRAP_G_LOCK_RETRY message.
419 } else if (ret == 0) {
420 if (timeval_expired(&timeout_end)) {
421 DEBUG(10, ("g_lock_lock timed out\n"));
422 status = NT_STATUS_LOCK_NOT_GRANTED;
425 DEBUG(10, ("select returned 0 but timeout not "
426 "not expired: strange - retrying\n"));
428 } else if (ret != 1) {
429 DEBUG(1, ("invalid return code of select: %d\n", ret));
430 status = NT_STATUS_INTERNAL_ERROR;
435 * This means ctdbd has sent us some data.
436 * Might be a CTDB_SRVID_RECONFIGURE or a
437 * CTDB_SRVID_SAMBA_NOTIFY message.
444 if (!NT_STATUS_IS_OK(status)) {
445 NTSTATUS unlock_status;
447 unlock_status = g_lock_unlock(ctx, name);
449 if (!NT_STATUS_IS_OK(unlock_status)) {
450 DEBUG(1, ("Could not remove ourself from the locking "
451 "db: %s\n", nt_errstr(status)));
455 messaging_deregister(ctx->msg, MSG_DBWRAP_G_LOCK_RETRY, &retry);
461 static void g_lock_got_retry(struct messaging_context *msg,
464 struct server_id server_id,
467 bool *pretry = (bool *)private_data;
469 DEBUG(10, ("Got retry message from pid %s\n",
470 procid_str(talloc_tos(), &server_id)));
475 static void g_lock_timedout(struct tevent_context *ev,
476 struct tevent_timer *te,
477 struct timeval current_time,
480 bool *ptimedout = (bool *)private_data;
485 static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
486 struct server_id pid)
488 struct db_record *rec = NULL;
489 struct g_lock_rec *locks = NULL;
491 enum g_lock_type lock_type;
494 rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
495 string_term_tdb_data(name));
497 DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
498 status = NT_STATUS_INTERNAL_ERROR;
502 if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
503 DEBUG(10, ("g_lock_parse for %s failed\n", name));
504 status = NT_STATUS_INTERNAL_ERROR;
508 for (i=0; i<num_locks; i++) {
509 if (procid_equal(&pid, &locks[i].pid)) {
514 if (i == num_locks) {
515 DEBUG(10, ("g_lock_force_unlock: Lock not found\n"));
516 status = NT_STATUS_INTERNAL_ERROR;
520 lock_type = locks[i].lock_type;
522 if (i < (num_locks-1)) {
523 locks[i] = locks[num_locks-1];
527 if (num_locks == 0) {
528 status = rec->delete_rec(rec);
531 data = make_tdb_data((uint8_t *)locks,
532 sizeof(struct g_lock_rec) * num_locks);
533 status = rec->store(rec, data, 0);
536 if (!NT_STATUS_IS_OK(status)) {
537 DEBUG(1, ("g_lock_force_unlock: Could not store record: %s\n",
542 if ((lock_type & G_LOCK_PENDING) == 0) {
546 * We've been the lock holder. Others to retry. Don't
547 * tell all others to avoid a thundering herd. In case
548 * this leads to a complete stall because we miss some
549 * processes, the loop in g_lock_lock tries at least
553 for (i=0; i<num_locks; i++) {
554 if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
557 if (!process_exists(locks[i].pid)) {
562 * Ping all waiters to retry
564 status = messaging_send(ctx->msg, locks[i].pid,
565 MSG_DBWRAP_G_LOCK_RETRY,
567 if (!NT_STATUS_IS_OK(status)) {
568 DEBUG(1, ("sending retry to %s failed: %s\n",
569 procid_str(debug_ctx(),
575 if (num_wakeups > 5) {
587 NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name)
591 status = g_lock_force_unlock(ctx, name, procid_self());
593 #ifdef CLUSTER_SUPPORT
594 if (lp_clustering()) {
595 ctdb_unwatch(messaging_ctdbd_connection());
601 struct g_lock_locks_state {
602 int (*fn)(const char *name, void *private_data);
606 static int g_lock_locks_fn(struct db_record *rec, void *priv)
608 struct g_lock_locks_state *state = (struct g_lock_locks_state *)priv;
610 if ((rec->key.dsize == 0) || (rec->key.dptr[rec->key.dsize-1] != 0)) {
611 DEBUG(1, ("invalid key in g_lock.tdb, ignoring\n"));
614 return state->fn((char *)rec->key.dptr, state->private_data);
617 int g_lock_locks(struct g_lock_ctx *ctx,
618 int (*fn)(const char *name, void *private_data),
621 struct g_lock_locks_state state;
624 state.private_data = private_data;
626 return ctx->db->traverse_read(ctx->db, g_lock_locks_fn, &state);
629 NTSTATUS g_lock_dump(struct g_lock_ctx *ctx, const char *name,
630 int (*fn)(struct server_id pid,
631 enum g_lock_type lock_type,
637 struct g_lock_rec *locks = NULL;
640 if (ctx->db->fetch(ctx->db, talloc_tos(), string_term_tdb_data(name),
642 return NT_STATUS_NOT_FOUND;
645 if ((data.dsize == 0) || (data.dptr == NULL)) {
649 ret = g_lock_parse(talloc_tos(), data, &num_locks, &locks);
651 TALLOC_FREE(data.dptr);
654 DEBUG(10, ("g_lock_parse for %s failed\n", name));
655 return NT_STATUS_INTERNAL_ERROR;
658 for (i=0; i<num_locks; i++) {
659 if (fn(locks[i].pid, locks[i].lock_type, private_data) != 0) {
667 struct g_lock_get_state {
669 struct server_id *pid;
672 static int g_lock_get_fn(struct server_id pid, enum g_lock_type lock_type,
675 struct g_lock_get_state *state = (struct g_lock_get_state *)priv;
677 if ((lock_type & G_LOCK_PENDING) != 0) {
686 NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name,
687 struct server_id *pid)
689 struct g_lock_get_state state;
695 status = g_lock_dump(ctx, name, g_lock_get_fn, &state);
696 if (!NT_STATUS_IS_OK(status)) {
700 return NT_STATUS_NOT_FOUND;