s3: Add parameter "ctdb timeout"

author Volker Lendecke <vl@samba.org>

Tue, 3 Nov 2009 04:41:02 +0000 (05:41 +0100)

committer Michael Adam <obnox@samba.org>

Wed, 10 Mar 2010 12:22:13 +0000 (13:22 +0100)
author Volker Lendecke <vl@samba.org>
Tue, 3 Nov 2009 04:41:02 +0000 (05:41 +0100)
committer Michael Adam <obnox@samba.org>
Wed, 10 Mar 2010 12:22:13 +0000 (13:22 +0100)
diff --git a/docs-xml/smbdotconf/misc/ctdbtimeout.xml b/docs-xml/smbdotconf/misc/ctdbtimeout.xml

new file mode 100644 (file)

index 0000000..97d5039
--- /dev/null
+++ b/docs-xml/smbdotconf/misc/ctdbtimeout.xml
@@ -0,0 +1,37 @@
+<samba:parameter name="ctdb timeout"
+                 context="G"
+                                type="integer"
+                 advanced="1"
+                 xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+<description>
+       <para>This parameter specifies a timeout in seconds for the
+         connection between Samba and ctdb. It is only valid if you
+         have compiled Samba with clustering and if you have
+         set <parameter>clustering=yes</parameter>.
+       </para>
+       <para>When something in the cluster blocks, it can happen that
+         we wait indefinitely long for ctdb, just adding to the
+         blocking condition. In a well-running cluster this should
+         never happen, but there are too many components in a cluster
+         that might have hickups. Choosing the right balance for this
+         value is very tricky, because on a busy cluster long service
+         times to transfer something across the cluster might be
+         valid. Setting it too short will degrade the service your
+         cluster presents, setting it too long might make the cluster
+         itself not recover from something severely broken for too
+         long.
+       </para>
+       <para>
+         Be aware that if you set this parameter, this needs to be in
+         the file smb.conf, it is not really helpful to put this into
+         a registry configuration (typical on a cluster), because to
+         access the registry contact to ctdb is requred.
+       </para>
+       <para>Setting <parameter>ctdb timeout</parameter> to n makes
+         any process waiting longer than n seconds for a reply by the
+         cluster panic. Setting it to 0 (the default) makes Samba
+         block forever, which is the highly recommended default.
+       </para>
+</description>
+<value type="default">0</value>
+</samba:parameter>
diff --git a/source3/include/packet.h b/source3/include/packet.h

index 03331da7503d64abb571df9c20f8f0c6b0347258..45a9bc2ef19dec3cfb94b3b781f3661328047114 100644 (file)
--- a/source3/include/packet.h
+++ b/source3/include/packet.h
@@ -38,7 +38,8 @@ NTSTATUS packet_fd_read(struct packet_context *ctx);
  /*
   * Sync read, wait for the next chunk
   */
-NTSTATUS packet_fd_read_sync(struct packet_context *ctx);
+NTSTATUS packet_fd_read_sync(struct packet_context *ctx,
+                            struct timeval *timeout);
  
  /*
   * Handle an incoming packet:
diff --git a/source3/include/proto.h b/source3/include/proto.h

index d9b8f072a190a35b7f13040285c03de57ae4ae53..2884cc97a32cb81a5dc64d8f8ebb2053669db8c3 100644 (file)
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -4124,6 +4124,7 @@ int lp_cups_connection_timeout(void);
  const char *lp_ctdbd_socket(void);
  const char **lp_cluster_addresses(void);
  bool lp_clustering(void);
+int lp_ctdb_timeout(void);
  char *lp_printcommand(int );
  char *lp_lpqcommand(int );
  char *lp_lprmcommand(int );
diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c

index 540e62d64399b1f4ffaf9e046f24dcb350ce10ea..291fa3c8e4e972b5846f2df4e1ce96a0b4e90f66 100644 (file)
--- a/source3/lib/ctdbd_conn.c
+++ b/source3/lib/ctdbd_conn.c
@@ -275,6 +275,17 @@ static struct messaging_rec *ctdb_pull_messaging_rec(TALLOC_CTX *mem_ctx,
         return result;
  }
  
+static NTSTATUS ctdb_packet_fd_read_sync(struct packet_context *ctx)
+{
+       struct timeval timeout;
+       struct timeval *ptimeout;
+
+       timeout = timeval_set(lp_ctdb_timeout(), 0);
+       ptimeout = (timeout.tv_sec != 0) ? &timeout : NULL;
+
+       return packet_fd_read_sync(ctx, ptimeout);
+}
+
  /*
   * Read a full ctdbd request. If we have a messaging context, defer incoming
   * messages that might come in between.
@@ -289,7 +300,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
  
   again:
  
-       status = packet_fd_read_sync(conn->pkt);
+       status = ctdb_packet_fd_read_sync(conn->pkt);
  
         if (NT_STATUS_EQUAL(status, NT_STATUS_NETWORK_BUSY)) {
                 /* EAGAIN */
@@ -1157,7 +1168,7 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
                         break;
                 }
  
-               status = packet_fd_read_sync(conn->pkt);
+               status = ctdb_packet_fd_read_sync(conn->pkt);
  
                 if (NT_STATUS_EQUAL(status, NT_STATUS_RETRY)) {
                         /*
diff --git a/source3/lib/packet.c b/source3/lib/packet.c

index ef28bf9f625b53b0f8ef214eaf1e0ac3fe9262d4..c131b973bc3f525e5dbfda71277360b59bbc48b6 100644 (file)
--- a/source3/lib/packet.c
+++ b/source3/lib/packet.c
@@ -101,7 +101,8 @@ NTSTATUS packet_fd_read(struct packet_context *ctx)
         return NT_STATUS_OK;
  }
  
-NTSTATUS packet_fd_read_sync(struct packet_context *ctx)
+NTSTATUS packet_fd_read_sync(struct packet_context *ctx,
+                            struct timeval *timeout)
  {
         int res;
         fd_set r_fds;
@@ -109,7 +110,12 @@ NTSTATUS packet_fd_read_sync(struct packet_context *ctx)
         FD_ZERO(&r_fds);
         FD_SET(ctx->fd, &r_fds);
  
-       res = sys_select(ctx->fd+1, &r_fds, NULL, NULL, NULL);
+       res = sys_select(ctx->fd+1, &r_fds, NULL, NULL, timeout);
+
+       if (res == 0) {
+               DEBUG(10, ("select timed out\n"));
+               return NT_STATUS_IO_TIMEOUT;
+       }
  
         if (res == -1) {
                 DEBUG(10, ("select returned %s\n", strerror(errno)));
diff --git a/source3/param/loadparm.c b/source3/param/loadparm.c

index ad5b86fef81375202c5011b3bc0e5dc1a1b2e70b..110916f89941a1d7df575340769bd6381142d96a 100644 (file)
--- a/source3/param/loadparm.c
+++ b/source3/param/loadparm.c
@@ -266,6 +266,7 @@ struct global {
         char *ctdbdSocket;
         char **szClusterAddresses;
         bool clustering;
+       int ctdb_timeout;
         int ldap_passwd_sync;
         int ldap_replication_sleep;
         int ldap_timeout; /* This is initialised in init_globals */
@@ -2531,6 +2532,15 @@ static struct parm_struct parm_table[] = {
                 .enum_list      = NULL,
                 .flags          = FLAG_ADVANCED | FLAG_GLOBAL,
         },
+       {
+               .label          = "ctdb timeout",
+               .type           = P_INTEGER,
+               .p_class        = P_GLOBAL,
+               .ptr            = &Globals.ctdb_timeout,
+               .special        = NULL,
+               .enum_list      = NULL,
+               .flags          = FLAG_ADVANCED | FLAG_GLOBAL,
+       },
  
         {N_("Printing Options"), P_SEP, P_SEPARATOR},
  
@@ -5084,6 +5094,7 @@ static void init_globals(bool first_time_only)
         string_set(&Globals.ctdbdSocket, "");
         Globals.szClusterAddresses = NULL;
         Globals.clustering = False;
+       Globals.ctdb_timeout = 0;
  
         Globals.winbind_cache_time = 300;       /* 5 minutes */
         Globals.winbind_reconnect_delay = 30;   /* 30 seconds */
@@ -5513,6 +5524,7 @@ FN_GLOBAL_INTEGER(lp_cups_connection_timeout, &Globals.cups_connection_timeout)
  FN_GLOBAL_CONST_STRING(lp_ctdbd_socket, &Globals.ctdbdSocket)
  FN_GLOBAL_LIST(lp_cluster_addresses, &Globals.szClusterAddresses)
  FN_GLOBAL_BOOL(lp_clustering, &Globals.clustering)
+FN_GLOBAL_INTEGER(lp_ctdb_timeout, &Globals.ctdb_timeout)
  FN_LOCAL_STRING(lp_printcommand, szPrintcommand)
  FN_LOCAL_STRING(lp_lpqcommand, szLpqcommand)
  FN_LOCAL_STRING(lp_lprmcommand, szLprmcommand)
author	Volker Lendecke <vl@samba.org>
	Tue, 3 Nov 2009 04:41:02 +0000 (05:41 +0100)
committer	Michael Adam <obnox@samba.org>
	Wed, 10 Mar 2010 12:22:13 +0000 (13:22 +0100)
docs-xml/smbdotconf/misc/ctdbtimeout.xml	[new file with mode: 0644]	patch \| blob
source3/include/packet.h		patch \| blob \| history
source3/include/proto.h		patch \| blob \| history
source3/lib/ctdbd_conn.c		patch \| blob \| history
source3/lib/packet.c		patch \| blob \| history
source3/param/loadparm.c		patch \| blob \| history