From ef6cd76866bfaff3f462ef71f9dba028fac4f3ae Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 23 Feb 2011 15:46:36 +1100 Subject: [PATCH] Deferred attach : at early startup, defer any db attach calls until we are out of recovery. --- include/ctdb_private.h | 11 +++- server/ctdb_control.c | 4 +- server/ctdb_ltdb_server.c | 106 ++++++++++++++++++++++++++++++++++---- server/ctdb_recover.c | 10 ++++ server/ctdb_tunables.c | 3 +- 5 files changed, 119 insertions(+), 15 deletions(-) diff --git a/include/ctdb_private.h b/include/ctdb_private.h index 4dcf9a5b..c1499b46 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -118,6 +118,7 @@ struct ctdb_tunable { uint32_t use_status_events_for_monitoring; uint32_t allow_unhealthy_db_read; uint32_t stat_history_interval; + uint32_t deferred_attach_timeout; }; /* @@ -488,6 +489,9 @@ struct ctdb_context { /* used in the recovery daemon to remember the ip allocation */ struct trbt_tree *ip_tree; + + /* Used to defer db attach requests while in recovery mode */ + struct ctdb_deferred_attach_context *deferred_attach; }; struct ctdb_db_context { @@ -799,7 +803,10 @@ int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode, void *private_data); int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata, - TDB_DATA *outdata, uint64_t tdb_flags, bool persistent); + TDB_DATA *outdata, uint64_t tdb_flags, + bool persistent, uint32_t client_id, + struct ctdb_req_control *c, + bool *async_reply); int ctdb_daemon_set_call(struct ctdb_context *ctdb, uint32_t db_id, ctdb_fn_t fn, int id); @@ -1362,4 +1369,6 @@ int32_t ctdb_control_get_stat_history(struct ctdb_context *ctdb, int ctdb_deferred_drop_all_ips(struct ctdb_context *ctdb); +int ctdb_process_deferred_attach(struct ctdb_context *ctdb); + #endif diff --git a/server/ctdb_control.c b/server/ctdb_control.c index 90900c94..69724e39 100644 --- a/server/ctdb_control.c +++ b/server/ctdb_control.c @@ -221,10 +221,10 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, } case CTDB_CONTROL_DB_ATTACH: - return ctdb_control_db_attach(ctdb, indata, outdata, srvid, false); + return ctdb_control_db_attach(ctdb, indata, outdata, srvid, false, client_id, c, async_reply); case CTDB_CONTROL_DB_ATTACH_PERSISTENT: - return ctdb_control_db_attach(ctdb, indata, outdata, srvid, true); + return ctdb_control_db_attach(ctdb, indata, outdata, srvid, true, client_id, c, async_reply); case CTDB_CONTROL_SET_CALL: { struct ctdb_control_set_call *sc = diff --git a/server/ctdb_ltdb_server.c b/server/ctdb_ltdb_server.c index ba2a9cb4..3e90b2d0 100644 --- a/server/ctdb_ltdb_server.c +++ b/server/ctdb_ltdb_server.c @@ -745,33 +745,117 @@ again: } +struct ctdb_deferred_attach_context { + struct ctdb_deferred_attach_context *next, *prev; + struct ctdb_context *ctdb; + struct ctdb_req_control *c; +}; + + +static int ctdb_deferred_attach_destructor(struct ctdb_deferred_attach_context *da_ctx) +{ + DLIST_REMOVE(da_ctx->ctdb->deferred_attach, da_ctx); + + return 0; +} + +static void ctdb_deferred_attach_timeout(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data) +{ + struct ctdb_deferred_attach_context *da_ctx = talloc_get_type(private_data, struct ctdb_deferred_attach_context); + struct ctdb_context *ctdb = da_ctx->ctdb; + + ctdb_request_control_reply(ctdb, da_ctx->c, NULL, -1, NULL); + talloc_free(da_ctx); +} + +static void ctdb_deferred_attach_callback(struct event_context *ev, struct timed_event *te, struct timeval t, void *private_data) +{ + struct ctdb_deferred_attach_context *da_ctx = talloc_get_type(private_data, struct ctdb_deferred_attach_context); + struct ctdb_context *ctdb = da_ctx->ctdb; + + /* This talloc-steals the packet ->c */ + ctdb_input_pkt(ctdb, (struct ctdb_req_header *)da_ctx->c); + talloc_free(da_ctx); +} + +int ctdb_process_deferred_attach(struct ctdb_context *ctdb) +{ + struct ctdb_deferred_attach_context *da_ctx; + + /* call it from the main event loop as soon as the current event + finishes. + */ + while ((da_ctx = ctdb->deferred_attach) != NULL) { + DLIST_REMOVE(ctdb->deferred_attach, da_ctx); + event_add_timed(ctdb->ev, ctdb, timeval_current_ofs(1,0), ctdb_deferred_attach_callback, da_ctx); + } + + return 0; +} + /* a client has asked to attach a new database */ int32_t ctdb_control_db_attach(struct ctdb_context *ctdb, TDB_DATA indata, TDB_DATA *outdata, uint64_t tdb_flags, - bool persistent) + bool persistent, uint32_t client_id, + struct ctdb_req_control *c, + bool *async_reply) { const char *db_name = (const char *)indata.dptr; struct ctdb_db_context *db; struct ctdb_node *node = ctdb->nodes[ctdb->pnn]; + /* dont allow any local clients to attach while we are in recovery mode + * except for the recovery daemon. + * allow all attach from the network since these are always from remote + * recovery daemons. + */ + if (client_id != 0) { + struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client); + + if (client == NULL) { + DEBUG(DEBUG_ERR,("DB Attach to database %s refused. Can not match clientid:%d to a client structure.\n", db_name, client_id)); + return -1; + } + + /* If the node is inactive it is not part of the cluster + and we should not allow clients to attach to any + databases + */ + if (node->flags & NODE_FLAGS_INACTIVE) { + DEBUG(DEBUG_ERR,("DB Attach to database %s refused since node is inactive (disconnected or banned)\n", db_name)); + return -1; + } + + if (ctdb->recovery_mode == CTDB_RECOVERY_ACTIVE + && client->pid != ctdb->recoverd_pid) { + struct ctdb_deferred_attach_context *da_ctx = talloc(client, struct ctdb_deferred_attach_context); + + if (da_ctx == NULL) { + DEBUG(DEBUG_ERR,("DB Attach to database %s deferral for client with pid:%d failed due to OOM.\n", db_name, client->pid)); + return -1; + } + + da_ctx->ctdb = ctdb; + da_ctx->c = talloc_steal(da_ctx, c); + talloc_set_destructor(da_ctx, ctdb_deferred_attach_destructor); + DLIST_ADD(ctdb->deferred_attach, da_ctx); + + event_add_timed(ctdb->ev, da_ctx, timeval_current_ofs(ctdb->tunable.deferred_attach_timeout, 0), ctdb_deferred_attach_timeout, da_ctx); + + DEBUG(DEBUG_ERR,("DB Attach to database %s deferred for client with pid:%d since node is in recovery mode.\n", db_name, client->pid)); + *async_reply = true; + return 0; + } + } + /* the client can optionally pass additional tdb flags, but we only allow a subset of those on the database in ctdb. Note that tdb_flags is passed in via the (otherwise unused) srvid to the attach control */ tdb_flags &= (TDB_NOSYNC|TDB_INCOMPATIBLE_HASH); - /* If the node is inactive it is not part of the cluster - and we should not allow clients to attach to any - databases - */ - if (node->flags & NODE_FLAGS_INACTIVE) { - DEBUG(DEBUG_ERR,("DB Attach to database %s refused since node is inactive (disconnected or banned)\n", db_name)); - return -1; - } - - /* see if we already have this name */ db = ctdb_db_handle(ctdb, db_name); if (db) { diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c index 4db4d97f..8f79f846 100644 --- a/server/ctdb_recover.c +++ b/server/ctdb_recover.c @@ -630,6 +630,11 @@ static void set_recmode_handler(struct event_context *ev, struct fd_event *fde, state->ctdb->recovery_mode = state->recmode; + /* release any deferred attach calls from clients */ + if (state->recmode == CTDB_RECOVERY_NORMAL) { + ctdb_process_deferred_attach(state->ctdb); + } + ctdb_request_control_reply(state->ctdb, state->c, NULL, 0, NULL); talloc_free(state); return; @@ -716,6 +721,11 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, state->fd[0] = -1; state->fd[1] = -1; + /* release any deferred attach calls from clients */ + if (recmode == CTDB_RECOVERY_NORMAL) { + ctdb_process_deferred_attach(ctdb); + } + if (ctdb->tunable.verify_recovery_lock == 0) { /* dont need to verify the reclock file */ ctdb->recovery_mode = recmode; diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c index 4cd1b457..0f8d7c8c 100644 --- a/server/ctdb_tunables.c +++ b/server/ctdb_tunables.c @@ -65,7 +65,8 @@ static const struct { { "MaxQueueDropMsg", 1000000, offsetof(struct ctdb_tunable, max_queue_depth_drop_msg) }, { "UseStatusEvents", 0, offsetof(struct ctdb_tunable, use_status_events_for_monitoring) }, { "AllowUnhealthyDBRead", 0, offsetof(struct ctdb_tunable, allow_unhealthy_db_read) }, - { "StatHistoryInterval", 1, offsetof(struct ctdb_tunable, stat_history_interval) } + { "StatHistoryInterval", 1, offsetof(struct ctdb_tunable, stat_history_interval) }, + { "DeferredAttachTO", 120, offsetof(struct ctdb_tunable, deferred_attach_timeout) } }; /* -- 2.34.1