From 9eb9c53ef29f4871ae2fe62fc5cb6145fca89eed Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 10 Jun 2010 08:58:55 +0930 Subject: [PATCH] Delay reusing ids to make protocol more robust Ronnie and I tracked down a bug which seems to be caused by a node running so slowly that we timed out the request and reused the request id before it responded. The result was that we unlocked the wrong record, leading to the following: ctdbd: tdb_unlock: count is 0 ctdbd: tdb_chainunlock failed smbd[1630912]: [2010/06/08 15:32:28.251716, 0] lib/util_sock.c:1491(get_peer_addr_internal) ctdbd: Could not find idr:43 ctdbd: server/ctdb_call.c:492 reqid 43 not found This exact problem is now detected, but in general we want to delay id reuse as long as possible to make our system more robust. Signed-off-by: Rusty Russell --- client/ctdb_client.c | 2 ++ common/ctdb_util.c | 8 +++++++- include/ctdb_private.h | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/client/ctdb_client.c b/client/ctdb_client.c index 05e54a85..cb113f41 100644 --- a/client/ctdb_client.c +++ b/client/ctdb_client.c @@ -2916,6 +2916,8 @@ struct ctdb_context *ctdb_init(struct event_context *ev) } ctdb->ev = ev; ctdb->idr = idr_init(ctdb); + /* Wrap early to exercise code. */ + ctdb->lastid = INT_MAX-2; CTDB_NO_MEMORY_NULL(ctdb, ctdb->idr); ret = ctdb_set_socketname(ctdb, CTDB_PATH); diff --git a/common/ctdb_util.c b/common/ctdb_util.c index f73f8b02..b928c7ed 100644 --- a/common/ctdb_util.c +++ b/common/ctdb_util.c @@ -159,7 +159,13 @@ void ctdb_reclock_latency(struct ctdb_context *ctdb, const char *name, double *l uint32_t ctdb_reqid_new(struct ctdb_context *ctdb, void *state) { - return idr_get_new(ctdb->idr, state, INT_MAX); + int id = idr_get_new_above(ctdb->idr, state, ctdb->lastid+1, INT_MAX); + if (id < 0) { + DEBUG(DEBUG_NOTICE, ("Reqid wrap!\n")); + id = idr_get_new(ctdb->idr, state, INT_MAX); + } + ctdb->lastid = id; + return id; } void *_ctdb_reqid_find(struct ctdb_context *ctdb, uint32_t reqid, const char *type, const char *location) diff --git a/include/ctdb_private.h b/include/ctdb_private.h index 16e5d60c..642a55e2 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -412,7 +412,7 @@ struct ctdb_context { unsigned flags; uint32_t capabilities; struct idr_context *idr; - uint16_t idr_cnt; + int lastid; struct ctdb_node **nodes; /* array of nodes in the cluster - indexed by vnn */ struct ctdb_vnn *vnn; /* list of public ip addresses and interfaces */ struct ctdb_vnn *single_ip_vnn; /* a structure for the single ip */ -- 2.34.1