s3:ctdb: pass the ctdb control flags to the ctdb daemon when sending the control

[metze/samba/wip.git] / source3 / lib / ctdbd_conn.c
diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c

index 75a513312e21f27d93f35f7d6af4f894f6f87f65..45e992ce8b3a81cf9f95ff118f11a6590f46deb8 100644 (file)
--- a/source3/lib/ctdbd_conn.c
+++ b/source3/lib/ctdbd_conn.c
@@ -57,7 +57,7 @@ static void cluster_fatal(const char *why)
            a core file. We need to release this process id immediately
            so that someone else can take over without getting sharing
            violations */
-       _exit(0);
+       _exit(1);
  }
  
  /*
@@ -104,6 +104,59 @@ static NTSTATUS get_cluster_vnn(struct ctdbd_connection *conn, uint32 *vnn)
         return status;
  }
  
+/*
+ * Are we active (i.e. not banned or stopped?)
+ */
+static bool ctdbd_working(struct ctdbd_connection *conn, uint32_t vnn)
+{
+       int32_t cstatus=-1;
+       NTSTATUS status;
+       TDB_DATA outdata;
+       struct ctdb_node_map *m;
+       uint32_t failure_flags;
+       bool ret = false;
+       int i;
+
+       status = ctdbd_control(conn, CTDB_CURRENT_NODE,
+                              CTDB_CONTROL_GET_NODEMAP, 0, 0,
+                              tdb_null, talloc_tos(), &outdata, &cstatus);
+       if (!NT_STATUS_IS_OK(status)) {
+               cluster_fatal("ctdbd_control failed\n");
+       }
+       if ((cstatus != 0) || (outdata.dptr == NULL)) {
+               DEBUG(2, ("Received invalid ctdb data\n"));
+               return false;
+       }
+
+       m = (struct ctdb_node_map *)outdata.dptr;
+
+       for (i=0; i<m->num; i++) {
+               if (vnn == m->nodes[i].pnn) {
+                       break;
+               }
+       }
+
+       if (i == m->num) {
+               DEBUG(2, ("Did not find ourselves (node %d) in nodemap\n",
+                         (int)vnn));
+               goto fail;
+       }
+
+       failure_flags = NODE_FLAGS_BANNED | NODE_FLAGS_DISCONNECTED
+               | NODE_FLAGS_PERMANENTLY_DISABLED | NODE_FLAGS_STOPPED;
+
+       if ((m->nodes[i].flags & failure_flags) != 0) {
+               DEBUG(2, ("Node has status %x, not active\n",
+                         (int)m->nodes[i].flags));
+               goto fail;
+       }
+
+       ret = true;
+fail:
+       TALLOC_FREE(outdata.dptr);
+       return ret;;
+}
+
  uint32 ctdbd_vnn(const struct ctdbd_connection *conn)
  {
         return conn->our_vnn;
@@ -200,7 +253,7 @@ struct deferred_msg_state {
  
  static void deferred_message_dispatch(struct event_context *event_ctx,
                                       struct timed_event *te,
-                                     const struct timeval *now,
+                                     struct timeval now,
                                       void *private_data)
  {
         struct deferred_msg_state *state = talloc_get_type_abort(
@@ -275,6 +328,17 @@ static struct messaging_rec *ctdb_pull_messaging_rec(TALLOC_CTX *mem_ctx,
         return result;
  }
  
+static NTSTATUS ctdb_packet_fd_read_sync(struct packet_context *ctx)
+{
+       struct timeval timeout;
+       struct timeval *ptimeout;
+
+       timeout = timeval_set(lp_ctdb_timeout(), 0);
+       ptimeout = (timeout.tv_sec != 0) ? &timeout : NULL;
+
+       return packet_fd_read_sync(ctx, ptimeout);
+}
+
  /*
   * Read a full ctdbd request. If we have a messaging context, defer incoming
   * messages that might come in between.
@@ -289,7 +353,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
  
   again:
  
-       status = packet_fd_read_sync(conn->pkt);
+       status = ctdb_packet_fd_read_sync(conn->pkt);
  
         if (NT_STATUS_EQUAL(status, NT_STATUS_NETWORK_BUSY)) {
                 /* EAGAIN */
@@ -350,10 +414,18 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
                         goto next_pkt;
                 }
  
-               if (msg->srvid == CTDB_SRVID_RECONFIGURE) {
-                       DEBUG(0,("Got cluster reconfigure message in ctdb_read_req\n"));
+               if ((msg->srvid == CTDB_SRVID_RECONFIGURE)
+                   || (msg->srvid == CTDB_SRVID_SAMBA_NOTIFY)) {
+
+                       DEBUG(1, ("ctdb_read_req: Got %s message\n",
+                                 (msg->srvid == CTDB_SRVID_RECONFIGURE)
+                                 ? "cluster reconfigure" : "SAMBA_NOTIFY"));
+
                         messaging_send(conn->msg_ctx, procid_self(),
                                        MSG_SMB_BRL_VALIDATE, &data_blob_null);
+                       messaging_send(conn->msg_ctx, procid_self(),
+                                      MSG_DBWRAP_G_LOCK_RETRY,
+                                      &data_blob_null);
                         TALLOC_FREE(hdr);
                         goto next_pkt;
                 }
@@ -383,7 +455,6 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
                 evt = event_add_timed(conn->msg_ctx->event_ctx,
                                       conn->msg_ctx->event_ctx,
                                       timeval_zero(),
-                                     "deferred_message_dispatch",
                                       deferred_message_dispatch,
                                       msg_state);
                 if (evt == NULL) {
@@ -438,6 +509,12 @@ NTSTATUS ctdbd_init_connection(TALLOC_CTX *mem_ctx,
                 goto fail;
         }
  
+       if (!ctdbd_working(conn, conn->our_vnn)) {
+               DEBUG(2, ("Node is not working, can not connect\n"));
+               status = NT_STATUS_INTERNAL_DB_ERROR;
+               goto fail;
+       }
+
         generate_random_buffer((unsigned char *)&conn->rand_srvid,
                                sizeof(conn->rand_srvid));
  
@@ -483,6 +560,11 @@ NTSTATUS ctdbd_messaging_connection(TALLOC_CTX *mem_ctx,
                 goto fail;
         }
  
+       status = register_with_ctdbd(conn, CTDB_SRVID_SAMBA_NOTIFY);
+       if (!NT_STATUS_IS_OK(status)) {
+               goto fail;
+       }
+
         *pconn = conn;
         return NT_STATUS_OK;
  
@@ -491,6 +573,16 @@ NTSTATUS ctdbd_messaging_connection(TALLOC_CTX *mem_ctx,
         return status;
  }
  
+struct messaging_context *ctdb_conn_msg_ctx(struct ctdbd_connection *conn)
+{
+       return conn->msg_ctx;
+}
+
+int ctdbd_conn_get_fd(struct ctdbd_connection *conn)
+{
+       return packet_get_fd(conn->pkt);
+}
+
  /*
   * Packet handler to receive and handle a ctdb message
   */
@@ -523,26 +615,24 @@ static NTSTATUS ctdb_handle_message(uint8_t *buf, size_t length,
  
         SMB_ASSERT(conn->msg_ctx != NULL);
  
-       if (msg->srvid == CTDB_SRVID_RECONFIGURE) {
+       if ((msg->srvid == CTDB_SRVID_RECONFIGURE)
+           || (msg->srvid == CTDB_SRVID_SAMBA_NOTIFY)){
                 DEBUG(0,("Got cluster reconfigure message\n"));
                 /*
-                * when the cluster is reconfigured, we need to clean the brl
-                * database
+                * when the cluster is reconfigured or someone of the
+                * family has passed away (SAMBA_NOTIFY), we need to
+                * clean the brl database
                  */
                 messaging_send(conn->msg_ctx, procid_self(),
                                MSG_SMB_BRL_VALIDATE, &data_blob_null);
  
-               /*
-                * it's possible that we have just rejoined the cluster after
-                * an outage. In that case our pending locks could have been
-                * removed from the lockdb, so retry them once more
-                */
-               message_send_all(conn->msg_ctx, MSG_SMB_UNLOCK, NULL, 0, NULL);
+               messaging_send(conn->msg_ctx, procid_self(),
+                              MSG_DBWRAP_G_LOCK_RETRY,
+                              &data_blob_null);
  
                 TALLOC_FREE(buf);
  
                 return NT_STATUS_OK;
-               
         }
  
         /* only messages to our pid or the broadcast are valid here */
@@ -702,9 +792,6 @@ static NTSTATUS ctdbd_control(struct ctdbd_connection *conn,
         struct ctdbd_connection *new_conn = NULL;
         NTSTATUS status;
  
-       /* the samba3 ctdb code can't handle NOREPLY yet */
-       flags &= ~CTDB_CTRL_FLAG_NOREPLY;
-
         if (conn == NULL) {
                 status = ctdbd_init_connection(NULL, &new_conn);
  
@@ -727,6 +814,7 @@ static NTSTATUS ctdbd_control(struct ctdbd_connection *conn,
         req.opcode           = opcode;
         req.srvid            = srvid;
         req.datalen          = data.dsize;
+       req.flags            = flags;
  
         DEBUG(10, ("ctdbd_control: Sending ctdb packet\n"));
         ctdb_packet_dump(&req.hdr);
@@ -750,6 +838,9 @@ static NTSTATUS ctdbd_control(struct ctdbd_connection *conn,
  
         if (flags & CTDB_CTRL_FLAG_NOREPLY) {
                 TALLOC_FREE(new_conn);
+               if (cstatus) {
+                       *cstatus = 0;
+               }
                 return NT_STATUS_OK;
         }
  
@@ -1102,6 +1193,11 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
         struct ctdbd_traverse_state state;
  
         status = ctdbd_init_connection(NULL, &conn);
+       if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(0, ("ctdbd_init_connection failed: %s\n",
+                         nt_errstr(status)));
+               return status;
+       }
  
         t.db_id = db_id;
         t.srvid = conn->rand_srvid;
@@ -1153,7 +1249,7 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
                         break;
                 }
  
-               status = packet_fd_read_sync(conn->pkt);
+               status = ctdb_packet_fd_read_sync(conn->pkt);
  
                 if (NT_STATUS_EQUAL(status, NT_STATUS_RETRY)) {
                         /*
@@ -1164,6 +1260,7 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
  
                 if (NT_STATUS_EQUAL(status, NT_STATUS_END_OF_FILE)) {
                         status = NT_STATUS_OK;
+                       break;
                 }
  
                 if (!NT_STATUS_IS_OK(status)) {
@@ -1177,35 +1274,78 @@ NTSTATUS ctdbd_traverse(uint32 db_id,
         return status;
  }
  
+/*
+   This is used to canonicalize a ctdb_sock_addr structure.
+*/
+static void smbd_ctdb_canonicalize_ip(const struct sockaddr_storage *in,
+                                     struct sockaddr_storage *out)
+{
+       memcpy(out, in, sizeof (*out));
+
+#ifdef HAVE_IPV6
+       if (in->ss_family == AF_INET6) {
+               const char prefix[12] = { 0,0,0,0,0,0,0,0,0,0,0xff,0xff };
+               const struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)in;
+               struct sockaddr_in *out4 = (struct sockaddr_in *)out;
+               if (memcmp(&in6->sin6_addr, prefix, 12) == 0) {
+                       memset(out, 0, sizeof(*out));
+#ifdef HAVE_SOCK_SIN_LEN
+                       out4->sin_len = sizeof(*out);
+#endif
+                       out4->sin_family = AF_INET;
+                       out4->sin_port   = in6->sin6_port;
+                       memcpy(&out4->sin_addr, &in6->sin6_addr.s6_addr32[3], 4);
+               }
+       }
+#endif
+}
+
  /*
   * Register us as a server for a particular tcp connection
   */
  
  NTSTATUS ctdbd_register_ips(struct ctdbd_connection *conn,
-                           const struct sockaddr *server,
-                           const struct sockaddr *client,
+                           const struct sockaddr_storage *_server,
+                           const struct sockaddr_storage *_client,
                             void (*release_ip_handler)(const char *ip_addr,
                                                        void *private_data),
                             void *private_data)
  {
-       struct ctdb_control_tcp_vnn p;
+       /*
+        * we still use ctdb_control_tcp for ipv4
+        * because we want to work against older ctdb
+        * versions at runtime
+        */
+       struct ctdb_control_tcp p4;
+#ifdef HAVE_STRUCT_CTDB_CONTROL_TCP_ADDR
+       struct ctdb_control_tcp_addr p;
+#endif
         TDB_DATA data;
         NTSTATUS status;
+       struct sockaddr_storage client;
+       struct sockaddr_storage server;
  
         /*
          * Only one connection so far
          */
         SMB_ASSERT(conn->release_ip_handler == NULL);
  
-       switch (client->sa_family) {
+       smbd_ctdb_canonicalize_ip(_client, &client);
+       smbd_ctdb_canonicalize_ip(_server, &server);
+
+       switch (client.ss_family) {
         case AF_INET:
-               p.dest.ip = *(struct sockaddr_in *)server;
-               p.src.ip = *(struct sockaddr_in *)client;
+               p4.dest = *(struct sockaddr_in *)&server;
+               p4.src = *(struct sockaddr_in *)&client;
+               data.dptr = (uint8_t *)&p4;
+               data.dsize = sizeof(p4);
                 break;
-#ifdef HAVE_IPV6
+#ifdef HAVE_STRUCT_CTDB_CONTROL_TCP_ADDR
         case AF_INET6:
-               p.dest.ip6 = *(struct sockaddr_in6 *)server;
-               p.src.ip6 = *(struct sockaddr_in6 *)client;
+               p.dest.ip6 = *(struct sockaddr_in6 *)&server;
+               p.src.ip6 = *(struct sockaddr_in6 *)&client;
+               data.dptr = (uint8_t *)&p;
+               data.dsize = sizeof(p);
                 break;
  #endif
         default:
@@ -1228,11 +1368,8 @@ NTSTATUS ctdbd_register_ips(struct ctdbd_connection *conn,
          * can send an extra ack to trigger a reset for our client, so it
          * immediately reconnects
          */
-       data.dptr = (uint8_t *)&p;
-       data.dsize = sizeof(p);
-
         return ctdbd_control(conn, CTDB_CURRENT_NODE, 
-                            CTDB_CONTROL_TCP_ADD, 0,
+                            CTDB_CONTROL_TCP_CLIENT, 0,
                              CTDB_CTRL_FLAG_NOREPLY, data, NULL, NULL, NULL);
  }
  
@@ -1255,6 +1392,50 @@ NTSTATUS ctdbd_control_local(struct ctdbd_connection *conn, uint32 opcode,
         return ctdbd_control(conn, CTDB_CURRENT_NODE, opcode, srvid, flags, data, mem_ctx, outdata, cstatus);
  }
  
+NTSTATUS ctdb_watch_us(struct ctdbd_connection *conn)
+{
+       struct ctdb_client_notify_register reg_data;
+       size_t struct_len;
+       NTSTATUS status;
+       int cstatus;
+
+       reg_data.srvid = CTDB_SRVID_SAMBA_NOTIFY;
+       reg_data.len = 1;
+       reg_data.notify_data[0] = 0;
+
+       struct_len = offsetof(struct ctdb_client_notify_register,
+                             notify_data) + reg_data.len;
+
+       status = ctdbd_control_local(
+               conn, CTDB_CONTROL_REGISTER_NOTIFY, conn->rand_srvid, 0,
+               make_tdb_data((uint8_t *)&reg_data, struct_len),
+               NULL, NULL, &cstatus);
+       if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(1, ("ctdbd_control_local failed: %s\n",
+                         nt_errstr(status)));
+       }
+       return status;
+}
+
+NTSTATUS ctdb_unwatch(struct ctdbd_connection *conn)
+{
+       struct ctdb_client_notify_deregister dereg_data;
+       NTSTATUS status;
+       int cstatus;
+
+       dereg_data.srvid = CTDB_SRVID_SAMBA_NOTIFY;
+
+       status = ctdbd_control_local(
+               conn, CTDB_CONTROL_DEREGISTER_NOTIFY, conn->rand_srvid, 0,
+               make_tdb_data((uint8_t *)&dereg_data, sizeof(dereg_data)),
+               NULL, NULL, &cstatus);
+       if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(1, ("ctdbd_control_local failed: %s\n",
+                         nt_errstr(status)));
+       }
+       return status;
+}
+
  #else
  
  NTSTATUS ctdbd_init_connection(TALLOC_CTX *mem_ctx,