s3: Fix a winbind race leading to 100% CPU
[samba.git] / source3 / winbindd / winbindd_dual.c
index 0aca790a041a9a2b12657225f7f74a7884a8a27e..2c0633c3eaa2673bb7c3a5c1db02d623a0ba5534 100644 (file)
@@ -37,6 +37,8 @@
 extern bool override_logfile;
 extern struct winbindd_methods cache_methods;
 
+static struct winbindd_child *children = NULL;
+
 /* Read some data from a client connection */
 
 static NTSTATUS child_read_request(struct winbindd_cli_state *state)
@@ -134,7 +136,7 @@ static void wb_child_request_trigger(struct tevent_req *req,
                req, struct wb_child_request_state);
        struct tevent_req *subreq;
 
-       if ((state->child->pid == 0) && (!fork_domain_child(state->child))) {
+       if ((state->child->sock == -1) && (!fork_domain_child(state->child))) {
                tevent_req_error(req, errno);
                return;
        }
@@ -164,6 +166,13 @@ static void wb_child_request_done(struct tevent_req *subreq)
        ret = wb_simple_trans_recv(subreq, state, &state->response, &err);
        TALLOC_FREE(subreq);
        if (ret == -1) {
+               /*
+                * The basic parent/child communication broke, close
+                * our socket
+                */
+               close(state->child->sock);
+               state->child->sock = -1;
+               DLIST_REMOVE(children, state->child);
                tevent_req_error(req, err);
                return;
        }
@@ -179,10 +188,6 @@ int wb_child_request_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx,
        if (tevent_req_is_unix_error(req, err)) {
                return -1;
        }
-       if (state->response->result != WINBINDD_OK) {
-               *err = EIO; /* EIO doesn't fit, but what would be better? */
-               return -1;
-       }
        *presponse = talloc_move(mem_ctx, &state->response);
        return 0;
 }
@@ -371,86 +376,6 @@ int wb_domain_request_recv(struct tevent_req *req, TALLOC_CTX *mem_ctx,
        return 0;
 }
 
-/*
- * Machinery for async requests sent to children. You set up a
- * winbindd_request, select a child to query, and issue a async_request
- * call. When the request is completed, the callback function you specified is
- * called back with the private pointer you gave to async_request.
- */
-
-struct winbindd_async_request {
-       struct winbindd_async_request *next, *prev;
-       TALLOC_CTX *mem_ctx;
-       struct winbindd_child *child;
-       struct winbindd_response *response;
-       void (*continuation)(void *private_data, bool success);
-       struct timed_event *reply_timeout_event;
-       pid_t child_pid; /* pid of the child we're waiting on. Used to detect
-                           a restart of the child (child->pid != child_pid). */
-       void *private_data;
-};
-
-static bool fork_domain_child(struct winbindd_child *child);
-static void async_request_done(struct tevent_req *req);
-
-void async_request(TALLOC_CTX *mem_ctx, struct winbindd_child *child,
-                  struct winbindd_request *request,
-                  struct winbindd_response *response,
-                  void (*continuation)(void *private_data, bool success),
-                  void *private_data)
-{
-       struct winbindd_async_request *state;
-       struct tevent_req *req;
-
-       DEBUG(10, ("Sending request to child pid %d (domain=%s)\n",
-                  (int)child->pid,
-                  (child->domain != NULL) ? child->domain->name : "''"));
-
-       state = talloc(mem_ctx, struct winbindd_async_request);
-       if (state == NULL) {
-               DEBUG(0, ("talloc failed\n"));
-               continuation(private_data, False);
-               return;
-       }
-
-       state->mem_ctx = mem_ctx;
-       state->child = child;
-       state->reply_timeout_event = NULL;
-       state->response = response;
-       state->continuation = continuation;
-       state->private_data = private_data;
-
-       request->pid = child->pid;
-
-       req = wb_child_request_send(state, winbind_event_context(),
-                                          child, request);
-       if (req == NULL) {
-               DEBUG(0, ("wb_child_request_send failed\n"));
-                continuation(private_data, false);
-               return;
-        }
-       tevent_req_set_callback(req, async_request_done, state);
-}
-
-static void async_request_done(struct tevent_req *req)
-{
-       struct winbindd_async_request *state = tevent_req_callback_data(
-               req, struct winbindd_async_request);
-       struct winbindd_response *response;
-       int ret, err;
-
-       ret = wb_child_request_recv(req, state, &response, &err);
-       TALLOC_FREE(req);
-       if (ret == -1) {
-               DEBUG(2, ("wb_child_request_recv failed: %s\n",
-                         strerror(err)));
-               state->continuation(state->private_data, false);
-               return;
-       }
-       *state->response = *response;
-       state->continuation(state->private_data, true);
-}
-
 struct domain_request_state {
        struct winbindd_domain *domain;
        struct winbindd_request *request;
@@ -504,7 +429,7 @@ static void async_domain_request_done(struct tevent_req *req)
        ret = wb_domain_request_recv(req, state, &response, &err);
        TALLOC_FREE(req);
        if (ret == -1) {
-               DEBUG(5, ("wb_domain_request returned %s\n", strerror(errno)));
+               DEBUG(5, ("wb_domain_request returned %s\n", strerror(err)));
                state->continuation(state->private_data_data, false);
                return;
        }
@@ -516,13 +441,13 @@ static void recvfrom_child(void *private_data_data, bool success)
 {
        struct winbindd_cli_state *state =
                talloc_get_type_abort(private_data_data, struct winbindd_cli_state);
-       enum winbindd_result result = state->response.result;
+       enum winbindd_result result = state->response->result;
 
        /* This is an optimization: The child has written directly to the
         * response buffer. The request itself is still in pending state,
         * state that in the result code. */
 
-       state->response.result = WINBINDD_PENDING;
+       state->response->result = WINBINDD_PENDING;
 
        if ((!success) || (result != WINBINDD_OK)) {
                request_error(state);
@@ -532,18 +457,11 @@ static void recvfrom_child(void *private_data_data, bool success)
        request_ok(state);
 }
 
-void sendto_child(struct winbindd_cli_state *state,
-                 struct winbindd_child *child)
-{
-       async_request(state->mem_ctx, child, state->request,
-                     &state->response, recvfrom_child, state);
-}
-
 void sendto_domain(struct winbindd_cli_state *state,
                   struct winbindd_domain *domain)
 {
        async_domain_request(state->mem_ctx, domain,
-                            state->request, &state->response,
+                            state->request, state->response,
                             recvfrom_child, state);
 }
 
@@ -556,8 +474,8 @@ static void child_process_request(struct winbindd_child *child,
        /* Free response data - we may be interrupted and receive another
           command before being able to send this data off. */
 
-       state->response.result = WINBINDD_ERROR;
-       state->response.length = sizeof(struct winbindd_response);
+       state->response->result = WINBINDD_ERROR;
+       state->response->length = sizeof(struct winbindd_response);
 
        /* as all requests in the child are sync, we can use talloc_tos() */
        state->mem_ctx = talloc_tos();
@@ -568,17 +486,17 @@ static void child_process_request(struct winbindd_child *child,
                if (state->request->cmd == table->struct_cmd) {
                        DEBUG(10,("child_process_request: request fn %s\n",
                                  table->name));
-                       state->response.result = table->struct_fn(domain, state);
+                       state->response->result = table->struct_fn(domain, state);
                        return;
                }
        }
 
        DEBUG(1 ,("child_process_request: unknown request fn number %d\n",
                  (int)state->request->cmd));
-       state->response.result = WINBINDD_ERROR;
+       state->response->result = WINBINDD_ERROR;
 }
 
-void setup_child(struct winbindd_child *child,
+void setup_child(struct winbindd_domain *domain, struct winbindd_child *child,
                 const struct winbindd_child_dispatch_table *table,
                 const char *logprefix,
                 const char *logname)
@@ -593,14 +511,15 @@ void setup_child(struct winbindd_child *child,
                          "logname == NULL");
        }
 
-       child->domain = NULL;
+       child->sock = -1;
+       child->domain = domain;
        child->table = table;
        child->queue = tevent_queue_create(NULL, "winbind_child");
        SMB_ASSERT(child->queue != NULL);
+       child->rpccli = wbint_rpccli_create(NULL, domain, child);
+       SMB_ASSERT(child->rpccli != NULL);
 }
 
-struct winbindd_child *children = NULL;
-
 void winbind_child_died(pid_t pid)
 {
        struct winbindd_child *child;
@@ -619,9 +538,6 @@ void winbind_child_died(pid_t pid)
        /* This will be re-added in fork_domain_child() */
 
        DLIST_REMOVE(children, child);
-
-       close(child->sock);
-       child->sock = -1;
        child->pid = 0;
 }
 
@@ -833,7 +749,7 @@ void winbind_msg_onlinestatus(struct messaging_context *msg_ctx,
        TALLOC_CTX *mem_ctx;
        const char *message;
        struct server_id *sender;
-       
+
        DEBUG(5,("winbind_msg_onlinestatus received.\n"));
 
        if (!data->data) {
@@ -846,7 +762,7 @@ void winbind_msg_onlinestatus(struct messaging_context *msg_ctx,
        if (mem_ctx == NULL) {
                return;
        }
-       
+
        message = collect_onlinestatus(mem_ctx);
        if (message == NULL) {
                talloc_destroy(mem_ctx);
@@ -1022,6 +938,7 @@ static bool calculate_next_machine_pwd_change(const char *domain,
        time_t pass_last_set_time;
        time_t timeout;
        time_t next_change;
+       struct timeval tv;
        char *pw;
 
        pw = secrets_fetch_machine_password(domain,
@@ -1041,11 +958,36 @@ static bool calculate_next_machine_pwd_change(const char *domain,
                return false;
        }
 
+       tv.tv_sec = pass_last_set_time;
+       DEBUG(10, ("password last changed %s\n",
+                  timeval_string(talloc_tos(), &tv, false)));
+       tv.tv_sec += timeout;
+       DEBUGADD(10, ("password valid until %s\n",
+                     timeval_string(talloc_tos(), &tv, false)));
+
        if (time(NULL) < (pass_last_set_time + timeout)) {
                next_change = pass_last_set_time + timeout;
                DEBUG(10,("machine password still valid until: %s\n",
                        http_timestring(talloc_tos(), next_change)));
                *t = timeval_set(next_change, 0);
+
+               if (lp_clustering()) {
+                       uint8_t randbuf;
+                       /*
+                        * When having a cluster, we have several
+                        * winbinds racing for the password change. In
+                        * the machine_password_change_handler()
+                        * function we check if someone else was
+                        * faster when the event triggers. We add a
+                        * 255-second random delay here, so that we
+                        * don't run to change the password at the
+                        * exact same moment.
+                        */
+                       generate_random_buffer(&randbuf, sizeof(randbuf));
+                       DEBUG(10, ("adding %d seconds randomness\n",
+                                  (int)randbuf));
+                       t->tv_sec += randbuf;
+               }
                return true;
        }
 
@@ -1074,9 +1016,18 @@ static void machine_password_change_handler(struct event_context *ctx,
 
        if (!calculate_next_machine_pwd_change(child->domain->name,
                                               &next_change)) {
+               DEBUG(10, ("calculate_next_machine_pwd_change failed\n"));
                return;
        }
 
+       DEBUG(10, ("calculate_next_machine_pwd_change returned %s\n",
+                  timeval_string(talloc_tos(), &next_change, false)));
+
+       if (!timeval_expired(&next_change)) {
+               DEBUG(10, ("Someone else has already changed the pw\n"));
+               goto done;
+       }
+
        if (!winbindd_can_contact_domain(child->domain)) {
                DEBUG(10,("machine_password_change_handler: Removing myself since I "
                          "do not have an incoming trust to domain %s\n",
@@ -1099,15 +1050,38 @@ static void machine_password_change_handler(struct event_context *ctx,
                                                   child->domain->name);
        TALLOC_FREE(frame);
 
+       DEBUG(10, ("machine_password_change_handler: "
+                  "trust_pw_find_change_and_store_it returned %s\n",
+                  nt_errstr(result)));
+
+       if (NT_STATUS_EQUAL(result, NT_STATUS_ACCESS_DENIED) ) {
+               DEBUG(3,("machine_password_change_handler: password set returned "
+                        "ACCESS_DENIED.  Maybe the trust account "
+                        "password was changed and we didn't know it. "
+                        "Killing connections to domain %s\n",
+                        child->domain->name));
+               TALLOC_FREE(child->domain->conn.netlogon_pipe);
+       }
+
+       if (!calculate_next_machine_pwd_change(child->domain->name,
+                                              &next_change)) {
+               DEBUG(10, ("calculate_next_machine_pwd_change failed\n"));
+               return;
+       }
+
+       DEBUG(10, ("calculate_next_machine_pwd_change returned %s\n",
+                  timeval_string(talloc_tos(), &next_change, false)));
+
        if (!NT_STATUS_IS_OK(result)) {
-               DEBUG(10,("machine_password_change_handler: "
-                       "failed to change machine password: %s\n",
-                        nt_errstr(result)));
-       } else {
-               DEBUG(10,("machine_password_change_handler: "
-                       "successfully changed machine password\n"));
+               struct timeval tmp;
+               /*
+                * In case of failure, give the DC a minute to recover
+                */
+               tmp = timeval_current_ofs(60, 0);
+               next_change = timeval_max(&next_change, &tmp);
        }
 
+done:
        child->machine_password_change_event = event_add_timed(winbind_event_context(), NULL,
                                                              next_change,
                                                              machine_password_change_handler,
@@ -1246,6 +1220,9 @@ bool winbindd_reinit_after_fork(const char *logfilename)
                                            logfilename))
                return false;
 
+       /* Stop zombies in children */
+       CatchChild();
+
        /* Don't handle the same messages as our parent. */
        messaging_deregister(winbind_messaging_context(),
                             MSG_SMB_CONF_UPDATED, NULL);
@@ -1312,10 +1289,22 @@ bool winbindd_reinit_after_fork(const char *logfilename)
        return true;
 }
 
+/*
+ * In a child there will be only one domain, reference that here.
+ */
+static struct winbindd_domain *child_domain;
+
+struct winbindd_domain *wb_child_domain(void)
+{
+       return child_domain;
+}
+
 static bool fork_domain_child(struct winbindd_child *child)
 {
        int fdpair[2];
        struct winbindd_cli_state state;
+       struct winbindd_request request;
+       struct winbindd_response response;
        struct winbindd_domain *primary_domain = NULL;
 
        if (child->domain) {
@@ -1324,6 +1313,7 @@ static bool fork_domain_child(struct winbindd_child *child)
        } else {
                DEBUG(10, ("fork_domain_child called without domain.\n"));
        }
+       child_domain = child->domain;
 
        if (socketpair(AF_UNIX, SOCK_STREAM, 0, fdpair) != 0) {
                DEBUG(0, ("Could not open child pipe: %s\n",
@@ -1333,7 +1323,8 @@ static bool fork_domain_child(struct winbindd_child *child)
 
        ZERO_STRUCT(state);
        state.pid = sys_getpid();
-       state.request = &state._request;
+       state.request = &request;
+       state.response = &response;
 
        child->pid = sys_fork();
 
@@ -1355,9 +1346,6 @@ static bool fork_domain_child(struct winbindd_child *child)
 
        DEBUG(10, ("Child process %d\n", (int)sys_getpid()));
 
-       /* Stop zombies in children */
-       CatchChild();
-
        state.sock = fdpair[0];
        close(fdpair[1]);
 
@@ -1477,9 +1465,24 @@ static bool fork_domain_child(struct winbindd_child *child)
 
                FD_ZERO(&r_fds);
                FD_ZERO(&w_fds);
+
+               if (state.sock < 0 || state.sock >= FD_SETSIZE) {
+                       TALLOC_FREE(frame);
+                       perror("EBADF");
+                       _exit(1);
+               }
+
                FD_SET(state.sock, &r_fds);
                maxfd = state.sock;
 
+               /*
+                * Initialize this high as event_add_to_select_args()
+                * uses a timeval_min() on this and next_event. Fix
+                * from Roel van Meer <rolek@alt001.com>.
+                */
+               t.tv_sec = 999999;
+               t.tv_usec = 0;
+
                event_add_to_select_args(winbind_event_context(), &now,
                                         &r_fds, &w_fds, &t, &maxfd);
                tp = get_timed_events_timeout(winbind_event_context(), &t);
@@ -1525,8 +1528,9 @@ static bool fork_domain_child(struct winbindd_child *child)
 
                DEBUG(4,("child daemon request %d\n", (int)state.request->cmd));
 
-               ZERO_STRUCT(state.response);
+               ZERO_STRUCTP(state.response);
                state.request->null_term = '\0';
+               state.mem_ctx = frame;
                child_process_request(child, &state);
 
                DEBUG(4, ("Finished processing child request %d\n",
@@ -1534,22 +1538,22 @@ static bool fork_domain_child(struct winbindd_child *child)
 
                SAFE_FREE(state.request->extra_data.data);
 
-               iov[0].iov_base = (void *)&state.response;
+               iov[0].iov_base = (void *)state.response;
                iov[0].iov_len = sizeof(struct winbindd_response);
                iov_count = 1;
 
-               if (state.response.length > sizeof(struct winbindd_response)) {
+               if (state.response->length > sizeof(struct winbindd_response)) {
                        iov[1].iov_base =
-                               (void *)state.response.extra_data.data;
-                       iov[1].iov_len = state.response.length-iov[0].iov_len;
+                               (void *)state.response->extra_data.data;
+                       iov[1].iov_len = state.response->length-iov[0].iov_len;
                        iov_count = 2;
                }
 
                DEBUG(10, ("Writing %d bytes to parent\n",
-                          (int)state.response.length));
+                          (int)state.response->length));
 
                if (write_data_iov(state.sock, iov, iov_count) !=
-                   state.response.length) {
+                   state.response->length) {
                        DEBUG(0, ("Could not write result\n"));
                        exit(1);
                }