s3: Fix a winbind race leading to 100% CPU
[samba.git] / source3 / winbindd / winbindd_dual.c
index a852727692b2e1f649d6ee957b9cbbedc200022a..2c0633c3eaa2673bb7c3a5c1db02d623a0ba5534 100644 (file)
@@ -37,6 +37,8 @@
 extern bool override_logfile;
 extern struct winbindd_methods cache_methods;
 
+static struct winbindd_child *children = NULL;
+
 /* Read some data from a client connection */
 
 static NTSTATUS child_read_request(struct winbindd_cli_state *state)
@@ -134,7 +136,7 @@ static void wb_child_request_trigger(struct tevent_req *req,
                req, struct wb_child_request_state);
        struct tevent_req *subreq;
 
-       if ((state->child->pid == 0) && (!fork_domain_child(state->child))) {
+       if ((state->child->sock == -1) && (!fork_domain_child(state->child))) {
                tevent_req_error(req, errno);
                return;
        }
@@ -164,6 +166,13 @@ static void wb_child_request_done(struct tevent_req *subreq)
        ret = wb_simple_trans_recv(subreq, state, &state->response, &err);
        TALLOC_FREE(subreq);
        if (ret == -1) {
+               /*
+                * The basic parent/child communication broke, close
+                * our socket
+                */
+               close(state->child->sock);
+               state->child->sock = -1;
+               DLIST_REMOVE(children, state->child);
                tevent_req_error(req, err);
                return;
        }
@@ -502,7 +511,8 @@ void setup_child(struct winbindd_domain *domain, struct winbindd_child *child,
                          "logname == NULL");
        }
 
-       child->domain = NULL;
+       child->sock = -1;
+       child->domain = domain;
        child->table = table;
        child->queue = tevent_queue_create(NULL, "winbind_child");
        SMB_ASSERT(child->queue != NULL);
@@ -510,8 +520,6 @@ void setup_child(struct winbindd_domain *domain, struct winbindd_child *child,
        SMB_ASSERT(child->rpccli != NULL);
 }
 
-struct winbindd_child *children = NULL;
-
 void winbind_child_died(pid_t pid)
 {
        struct winbindd_child *child;
@@ -530,9 +538,6 @@ void winbind_child_died(pid_t pid)
        /* This will be re-added in fork_domain_child() */
 
        DLIST_REMOVE(children, child);
-
-       close(child->sock);
-       child->sock = -1;
        child->pid = 0;
 }
 
@@ -744,7 +749,7 @@ void winbind_msg_onlinestatus(struct messaging_context *msg_ctx,
        TALLOC_CTX *mem_ctx;
        const char *message;
        struct server_id *sender;
-       
+
        DEBUG(5,("winbind_msg_onlinestatus received.\n"));
 
        if (!data->data) {
@@ -757,7 +762,7 @@ void winbind_msg_onlinestatus(struct messaging_context *msg_ctx,
        if (mem_ctx == NULL) {
                return;
        }
-       
+
        message = collect_onlinestatus(mem_ctx);
        if (message == NULL) {
                talloc_destroy(mem_ctx);
@@ -1215,6 +1220,9 @@ bool winbindd_reinit_after_fork(const char *logfilename)
                                            logfilename))
                return false;
 
+       /* Stop zombies in children */
+       CatchChild();
+
        /* Don't handle the same messages as our parent. */
        messaging_deregister(winbind_messaging_context(),
                             MSG_SMB_CONF_UPDATED, NULL);
@@ -1338,9 +1346,6 @@ static bool fork_domain_child(struct winbindd_child *child)
 
        DEBUG(10, ("Child process %d\n", (int)sys_getpid()));
 
-       /* Stop zombies in children */
-       CatchChild();
-
        state.sock = fdpair[0];
        close(fdpair[1]);
 
@@ -1460,9 +1465,24 @@ static bool fork_domain_child(struct winbindd_child *child)
 
                FD_ZERO(&r_fds);
                FD_ZERO(&w_fds);
+
+               if (state.sock < 0 || state.sock >= FD_SETSIZE) {
+                       TALLOC_FREE(frame);
+                       perror("EBADF");
+                       _exit(1);
+               }
+
                FD_SET(state.sock, &r_fds);
                maxfd = state.sock;
 
+               /*
+                * Initialize this high as event_add_to_select_args()
+                * uses a timeval_min() on this and next_event. Fix
+                * from Roel van Meer <rolek@alt001.com>.
+                */
+               t.tv_sec = 999999;
+               t.tv_usec = 0;
+
                event_add_to_select_args(winbind_event_context(), &now,
                                         &r_fds, &w_fds, &t, &maxfd);
                tp = get_timed_events_timeout(winbind_event_context(), &t);