smbd/winbindd: Do an early check if ctdbd is functional
[mat/samba.git] / source3 / lib / ctdbd_conn.c
index da58259e6b3e6754ca520c5ca49f0ea38e6c4c47..6ab4bbe70466e5e875fa18d8f27698fe51ec59a6 100644 (file)
 
 struct ctdbd_connection {
        struct messaging_context *msg_ctx;
-       uint32 reqid;
-       uint32 our_vnn;
-       uint64 rand_srvid;
+       uint32_t reqid;
+       uint32_t our_vnn;
+       uint64_t rand_srvid;
        struct ctdb_packet_context *pkt;
-       struct fd_event *fde;
+       struct tevent_fd *fde;
 
        void (*release_ip_handler)(const char *ip_addr, void *private_data);
        void *release_ip_priv;
@@ -73,8 +73,8 @@ static uint32_t ctdbd_next_reqid(struct ctdbd_connection *conn)
 }
 
 static NTSTATUS ctdbd_control(struct ctdbd_connection *conn,
-                             uint32_t vnn, uint32 opcode, 
-                             uint64_t srvid, uint32_t flags, TDB_DATA data, 
+                             uint32_t vnn, uint32_t opcode,
+                             uint64_t srvid, uint32_t flags, TDB_DATA data,
                              TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
                              int *cstatus);
 
@@ -120,7 +120,7 @@ NTSTATUS register_with_ctdbd(struct ctdbd_connection *conn, uint64_t srvid)
 /*
  * get our vnn from the cluster
  */
-static NTSTATUS get_cluster_vnn(struct ctdbd_connection *conn, uint32 *vnn)
+static NTSTATUS get_cluster_vnn(struct ctdbd_connection *conn, uint32_t *vnn)
 {
        int32_t cstatus=-1;
        NTSTATUS status;
@@ -128,7 +128,8 @@ static NTSTATUS get_cluster_vnn(struct ctdbd_connection *conn, uint32 *vnn)
                               CTDB_CURRENT_NODE, CTDB_CONTROL_GET_PNN, 0, 0,
                               tdb_null, NULL, NULL, &cstatus);
        if (!NT_STATUS_IS_OK(status)) {
-               cluster_fatal("ctdbd_control failed\n");
+               DEBUG(1, ("ctdbd_control failed: %s\n", nt_errstr(status)));
+               return status;
        }
        *vnn = (uint32_t)cstatus;
        return status;
@@ -151,7 +152,8 @@ static bool ctdbd_working(struct ctdbd_connection *conn, uint32_t vnn)
                               CTDB_CONTROL_GET_NODEMAP, 0, 0,
                               tdb_null, talloc_tos(), &outdata, &cstatus);
        if (!NT_STATUS_IS_OK(status)) {
-               cluster_fatal("ctdbd_control failed\n");
+               DEBUG(1, ("ctdbd_control failed: %s\n", nt_errstr(status)));
+               return false;
        }
        if ((cstatus != 0) || (outdata.dptr == NULL)) {
                DEBUG(2, ("Received invalid ctdb data\n"));
@@ -187,7 +189,7 @@ fail:
        return ret;
 }
 
-uint32 ctdbd_vnn(const struct ctdbd_connection *conn)
+uint32_t ctdbd_vnn(const struct ctdbd_connection *conn)
 {
        return conn->our_vnn;
 }
@@ -201,7 +203,7 @@ static NTSTATUS ctdbd_connect(TALLOC_CTX *mem_ctx,
 {
        struct ctdb_packet_context *result;
        const char *sockname = lp_ctdbd_socket();
-       struct sockaddr_un addr;
+       struct sockaddr_un addr = { 0, };
        int fd;
        socklen_t salen;
 
@@ -211,9 +213,8 @@ static NTSTATUS ctdbd_connect(TALLOC_CTX *mem_ctx,
                return map_nt_error_from_unix(errno);
        }
 
-       ZERO_STRUCT(addr);
        addr.sun_family = AF_UNIX;
-       strncpy(addr.sun_path, sockname, sizeof(addr.sun_path));
+       snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", sockname);
 
        salen = sizeof(struct sockaddr_un);
        if (connect(fd, (struct sockaddr *)(void *)&addr, salen) == -1) {
@@ -240,13 +241,13 @@ static bool ctdb_req_complete(const uint8_t *buf, size_t available,
                              size_t *length,
                              void *private_data)
 {
-       uint32 msglen;
+       uint32_t msglen;
 
        if (available < sizeof(msglen)) {
                return False;
        }
 
-       msglen = *((const uint32 *)buf);
+       msglen = *((const uint32_t *)buf);
 
        DEBUG(11, ("msglen = %d\n", msglen));
 
@@ -279,8 +280,8 @@ struct deferred_msg_state {
  * Timed event handler for the deferred message
  */
 
-static void deferred_message_dispatch(struct event_context *event_ctx,
-                                     struct timed_event *te,
+static void deferred_message_dispatch(struct tevent_context *event_ctx,
+                                     struct tevent_timer *te,
                                      struct timeval now,
                                      void *private_data)
 {
@@ -371,7 +372,7 @@ static NTSTATUS ctdb_packet_fd_read_sync(struct ctdb_packet_context *ctx)
  * messages that might come in between.
  */
 
-static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
+static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32_t reqid,
                              TALLOC_CTX *mem_ctx, void *result)
 {
        struct ctdb_req_header *hdr;
@@ -414,7 +415,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
        ctdb_packet_dump(hdr);
 
        if (hdr->operation == CTDB_REQ_MESSAGE) {
-               struct timed_event *evt;
+               struct tevent_timer *evt;
                struct deferred_msg_state *msg_state;
                struct ctdb_req_message *msg = (struct ctdb_req_message *)hdr;
 
@@ -476,7 +477,7 @@ static NTSTATUS ctdb_read_req(struct ctdbd_connection *conn, uint32 reqid,
                 * We're waiting for a call reply, but an async message has
                 * crossed. Defer dispatching to the toplevel event loop.
                 */
-               evt = event_add_timed(conn->msg_ctx->event_ctx,
+               evt = tevent_add_timer(conn->msg_ctx->event_ctx,
                                      conn->msg_ctx->event_ctx,
                                      timeval_zero(),
                                      deferred_message_dispatch,
@@ -685,8 +686,8 @@ static NTSTATUS ctdb_handle_message(uint8_t *buf, size_t length,
  * The ctdbd socket is readable asynchronuously
  */
 
-static void ctdbd_socket_handler(struct event_context *event_ctx,
-                                struct fd_event *event,
+static void ctdbd_socket_handler(struct tevent_context *event_ctx,
+                                struct tevent_fd *event,
                                 uint16 flags,
                                 void *private_data)
 {
@@ -721,9 +722,9 @@ NTSTATUS ctdbd_register_msg_ctx(struct ctdbd_connection *conn,
        SMB_ASSERT(conn->msg_ctx == NULL);
        SMB_ASSERT(conn->fde == NULL);
 
-       if (!(conn->fde = event_add_fd(msg_ctx->event_ctx, conn,
+       if (!(conn->fde = tevent_add_fd(msg_ctx->event_ctx, conn,
                                       ctdb_packet_get_fd(conn->pkt),
-                                      EVENT_FD_READ,
+                                      TEVENT_FD_READ,
                                       ctdbd_socket_handler,
                                       conn))) {
                DEBUG(0, ("event_add_fd failed\n"));
@@ -740,7 +741,7 @@ NTSTATUS ctdbd_register_msg_ctx(struct ctdbd_connection *conn,
  */
 
 NTSTATUS ctdbd_messaging_send(struct ctdbd_connection *conn,
-                             uint32 dst_vnn, uint64 dst_srvid,
+                             uint32_t dst_vnn, uint64_t dst_srvid,
                              struct messaging_rec *msg)
 {
        DATA_BLOB blob;
@@ -760,11 +761,11 @@ NTSTATUS ctdbd_messaging_send(struct ctdbd_connection *conn,
        status = ctdbd_messaging_send_blob(conn, dst_vnn, dst_srvid,
                                           blob.data, blob.length);
        TALLOC_FREE(blob.data);
-       return NT_STATUS_OK;
+       return status;
 }
 
 NTSTATUS ctdbd_messaging_send_blob(struct ctdbd_connection *conn,
-                                  uint32 dst_vnn, uint64 dst_srvid,
+                                  uint32_t dst_vnn, uint64_t dst_srvid,
                                   const uint8_t *buf, size_t buflen)
 {
        struct ctdb_req_message r;
@@ -806,9 +807,9 @@ NTSTATUS ctdbd_messaging_send_blob(struct ctdbd_connection *conn,
  * send/recv a generic ctdb control message
  */
 static NTSTATUS ctdbd_control(struct ctdbd_connection *conn,
-                             uint32_t vnn, uint32 opcode, 
-                             uint64_t srvid, uint32_t flags, 
-                             TDB_DATA data, 
+                             uint32_t vnn, uint32_t opcode,
+                             uint64_t srvid, uint32_t flags,
+                             TDB_DATA data,
                              TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
                              int *cstatus)
 {
@@ -904,7 +905,7 @@ static NTSTATUS ctdbd_control(struct ctdbd_connection *conn,
 /*
  * see if a remote process exists
  */
-bool ctdbd_process_exists(struct ctdbd_connection *conn, uint32 vnn, pid_t pid)
+bool ctdbd_process_exists(struct ctdbd_connection *conn, uint32_t vnn, pid_t pid)
 {
        struct server_id id;
        bool result;
@@ -1055,6 +1056,7 @@ static bool ctdb_collect_vnns(TALLOC_CTX *mem_ctx,
 
        vnn_indexes = talloc_array(mem_ctx, unsigned, num_pids);
        if (vnn_indexes == NULL) {
+               DEBUG(1, ("talloc_array failed\n"));
                goto fail;
        }
 
@@ -1079,6 +1081,7 @@ static bool ctdb_collect_vnns(TALLOC_CTX *mem_ctx,
                vnns = talloc_realloc(mem_ctx, vnns, struct ctdb_vnn_list,
                                      num_vnns+1);
                if (vnns == NULL) {
+                       DEBUG(1, ("talloc_realloc failed\n"));
                        goto fail;
                }
                vnns[num_vnns].vnn = vnn;
@@ -1091,11 +1094,13 @@ static bool ctdb_collect_vnns(TALLOC_CTX *mem_ctx,
 
                vnn->srvids = talloc_array(vnns, uint64_t, vnn->num_srvids);
                if (vnn->srvids == NULL) {
+                       DEBUG(1, ("talloc_array failed\n"));
                        goto fail;
                }
                vnn->pid_indexes = talloc_array(vnns, unsigned,
                                                vnn->num_srvids);
                if (vnn->pid_indexes == NULL) {
+                       DEBUG(1, ("talloc_array failed\n"));
                        goto fail;
                }
        }
@@ -1130,6 +1135,7 @@ bool ctdb_serverids_exist(struct ctdbd_connection *conn,
 
        if (!ctdb_collect_vnns(talloc_tos(), pids, num_pids,
                               &vnns, &num_vnns)) {
+               DEBUG(1, ("ctdb_collect_vnns failed\n"));
                goto fail;
        }
 
@@ -1166,16 +1172,16 @@ bool ctdb_serverids_exist(struct ctdbd_connection *conn,
                                               data)),
                        data_blob_const(vnn->srvids, req.datalen));
                if (!NT_STATUS_IS_OK(status)) {
-                       DEBUG(10, ("ctdb_packet_send failed: %s\n",
-                                  nt_errstr(status)));
+                       DEBUG(1, ("ctdb_packet_send failed: %s\n",
+                                 nt_errstr(status)));
                        goto fail;
                }
        }
 
        status = ctdb_packet_flush(conn->pkt);
        if (!NT_STATUS_IS_OK(status)) {
-               DEBUG(10, ("ctdb_packet_flush failed: %s\n",
-                          nt_errstr(status)));
+               DEBUG(1, ("ctdb_packet_flush failed: %s\n",
+                         nt_errstr(status)));
                goto fail;
        }
 
@@ -1185,16 +1191,18 @@ bool ctdb_serverids_exist(struct ctdbd_connection *conn,
                struct ctdb_reply_control *reply = NULL;
                struct ctdb_vnn_list *vnn;
                uint32_t reqid;
+               uint8_t *reply_data;
 
                status = ctdb_read_req(conn, 0, talloc_tos(), (void *)&reply);
                if (!NT_STATUS_IS_OK(status)) {
-                       DEBUG(10, ("ctdb_read_req failed: %s\n",
-                                  nt_errstr(status)));
+                       DEBUG(1, ("ctdb_read_req failed: %s\n",
+                                 nt_errstr(status)));
                        goto fail;
                }
 
                if (reply->hdr.operation != CTDB_REPLY_CONTROL) {
-                       DEBUG(10, ("Received invalid reply\n"));
+                       DEBUG(1, ("Received invalid reply %u\n",
+                                 (unsigned)reply->hdr.operation));
                        goto fail;
                }
 
@@ -1208,8 +1216,8 @@ bool ctdb_serverids_exist(struct ctdbd_connection *conn,
                        }
                }
                if (i == num_vnns) {
-                       DEBUG(10, ("Received unknown reqid number %u\n",
-                                  (unsigned)reqid));
+                       DEBUG(1, ("Received unknown reqid number %u\n",
+                                 (unsigned)reqid));
                        goto fail;
                }
 
@@ -1221,9 +1229,26 @@ bool ctdb_serverids_exist(struct ctdbd_connection *conn,
                           (unsigned)vnn->vnn, vnn->num_srvids,
                           (unsigned)reply->datalen));
 
-               if (reply->datalen < ((vnn->num_srvids+7)/8)) {
-                       DEBUG(10, ("Received short reply\n"));
-                       goto fail;
+               if (reply->datalen >= ((vnn->num_srvids+7)/8)) {
+                       /*
+                        * Got a real reply
+                        */
+                       reply_data = reply->data;
+               } else {
+                       /*
+                        * Got an error reply
+                        */
+                       DEBUG(5, ("Received short reply len %d, status %u, "
+                                 "errorlen %u\n",
+                                 (unsigned)reply->datalen,
+                                 (unsigned)reply->status,
+                                 (unsigned)reply->errorlen));
+                       dump_data(5, reply->data, reply->errorlen);
+
+                       /*
+                        * This will trigger everything set to false
+                        */
+                       reply_data = NULL;
                }
 
                for (i=0; i<vnn->num_srvids; i++) {
@@ -1234,7 +1259,9 @@ bool ctdb_serverids_exist(struct ctdbd_connection *conn,
                                results[idx] = true;
                                continue;
                        }
-                       results[idx] = ((reply->data[i/8] & (1<<(i%8))) != 0);
+                       results[idx] =
+                               (reply_data != NULL) &&
+                               ((reply_data[i/8] & (1<<(i%8))) != 0);
                }
 
                TALLOC_FREE(reply);
@@ -1284,8 +1311,7 @@ NTSTATUS ctdbd_db_attach(struct ctdbd_connection *conn,
        int32_t cstatus;
        bool persistent = (tdb_flags & TDB_CLEAR_IF_FIRST) == 0;
 
-       data.dptr = (uint8_t*)name;
-       data.dsize = strlen(name)+1;
+       data = string_term_tdb_data(name);
 
        status = ctdbd_control(conn, CTDB_CURRENT_NODE,
                               persistent
@@ -1329,7 +1355,7 @@ NTSTATUS ctdbd_db_attach(struct ctdbd_connection *conn,
 /*
  * force the migration of a record to this node
  */
-NTSTATUS ctdbd_migrate(struct ctdbd_connection *conn, uint32 db_id,
+NTSTATUS ctdbd_migrate(struct ctdbd_connection *conn, uint32_t db_id,
                       TDB_DATA key)
 {
        struct ctdb_req_call req;
@@ -1389,11 +1415,13 @@ NTSTATUS ctdbd_migrate(struct ctdbd_connection *conn, uint32 db_id,
 }
 
 /*
- * remotely fetch a record (read-only)
+ * Fetch a record and parse it
  */
-NTSTATUS ctdbd_fetch(struct ctdbd_connection *conn, uint32 db_id,
-                    TDB_DATA key, TALLOC_CTX *mem_ctx, TDB_DATA *data,
-                    bool local_copy)
+NTSTATUS ctdbd_parse(struct ctdbd_connection *conn, uint32_t db_id,
+                    TDB_DATA key, bool local_copy,
+                    void (*parser)(TDB_DATA key, TDB_DATA data,
+                                   void *private_data),
+                    void *private_data)
 {
        struct ctdb_req_call req;
        struct ctdb_reply_call *reply;
@@ -1448,21 +1476,17 @@ NTSTATUS ctdbd_fetch(struct ctdbd_connection *conn, uint32 db_id,
                goto fail;
        }
 
-       data->dsize = reply->datalen;
-       if (data->dsize == 0) {
-               data->dptr = NULL;
-               goto done;
-       }
-
-       data->dptr = (uint8 *)talloc_memdup(mem_ctx, &reply->data[0],
-                                           reply->datalen);
-       if (data->dptr == NULL) {
-               DEBUG(0, ("talloc failed\n"));
-               status = NT_STATUS_NO_MEMORY;
+       if (reply->datalen == 0) {
+               /*
+                * Treat an empty record as non-existing
+                */
+               status = NT_STATUS_NOT_FOUND;
                goto fail;
        }
 
- done:
+       parser(key, make_tdb_data(&reply->data[0], reply->datalen),
+              private_data);
+
        status = NT_STATUS_OK;
  fail:
        TALLOC_FREE(reply);
@@ -1537,7 +1561,7 @@ static NTSTATUS ctdb_traverse_handler(uint8_t *buf, size_t length,
   everything in-line.
 */
 
-NTSTATUS ctdbd_traverse(uint32 db_id,
+NTSTATUS ctdbd_traverse(uint32_t db_id,
                        void (*fn)(TDB_DATA key, TDB_DATA data,
                                   void *private_data),
                        void *private_data)
@@ -1746,8 +1770,8 @@ NTSTATUS ctdbd_register_reconfigure(struct ctdbd_connection *conn)
 /*
   call a control on the local node
  */
-NTSTATUS ctdbd_control_local(struct ctdbd_connection *conn, uint32 opcode, 
-                            uint64_t srvid, uint32_t flags, TDB_DATA data, 
+NTSTATUS ctdbd_control_local(struct ctdbd_connection *conn, uint32_t opcode,
+                            uint64_t srvid, uint32_t flags, TDB_DATA data,
                             TALLOC_CTX *mem_ctx, TDB_DATA *outdata,
                             int *cstatus)
 {
@@ -1798,10 +1822,34 @@ NTSTATUS ctdb_unwatch(struct ctdbd_connection *conn)
        return status;
 }
 
+NTSTATUS ctdbd_probe(void)
+{
+       /*
+        * Do a very early check if ctdbd is around to avoid an abort and core
+        * later
+        */
+       struct ctdbd_connection *conn = NULL;
+       NTSTATUS status;
+
+       status = ctdbd_messaging_connection(talloc_tos(), &conn);
+
+       /*
+        * We only care if we can connect.
+        */
+       TALLOC_FREE(conn);
+
+       return status;
+}
+
 #else
 
+NTSTATUS ctdbd_probe(void)
+{
+       return NT_STATUS_OK;
+}
+
 NTSTATUS ctdbd_messaging_send_blob(struct ctdbd_connection *conn,
-                                  uint32 dst_vnn, uint64 dst_srvid,
+                                  uint32_t dst_vnn, uint64_t dst_srvid,
                                   const uint8_t *buf, size_t buflen)
 {
        return NT_STATUS_NOT_IMPLEMENTED;