Back-port of Volkers fix.
[samba.git] / source / nsswitch / winbindd_dual.c
index 8d475e6c9f9e95e7a179ecf7c71b8fc035e4c049..396eca7beb389c181f06b01166b4ed3599f082f4 100644 (file)
@@ -96,9 +96,13 @@ struct winbindd_async_request {
        struct winbindd_request *request;
        struct winbindd_response *response;
        void (*continuation)(void *private_data, BOOL success);
+       struct timed_event *reply_timeout_event;
+       pid_t child_pid; /* pid of the child we're waiting on. Used to detect
+                           a restart of the child (child->pid != child_pid). */
        void *private_data;
 };
 
+static void async_request_fail(struct winbindd_async_request *state);
 static void async_main_request_sent(void *private_data, BOOL success);
 static void async_request_sent(void *private_data, BOOL success);
 static void async_reply_recv(void *private_data, BOOL success);
@@ -124,6 +128,7 @@ void async_request(TALLOC_CTX *mem_ctx, struct winbindd_child *child,
 
        state->mem_ctx = mem_ctx;
        state->child = child;
+       state->reply_timeout_event = NULL;
        state->request = request;
        state->response = response;
        state->continuation = continuation;
@@ -143,10 +148,7 @@ static void async_main_request_sent(void *private_data, BOOL success)
 
        if (!success) {
                DEBUG(5, ("Could not send async request\n"));
-
-               state->response->length = sizeof(struct winbindd_response);
-               state->response->result = WINBINDD_ERROR;
-               state->continuation(state->private_data, False);
+               async_request_fail(state);
                return;
        }
 
@@ -160,17 +162,64 @@ static void async_main_request_sent(void *private_data, BOOL success)
                          async_request_sent, state);
 }
 
+/****************************************************************
+ Handler triggered if the child winbindd doesn't respond within
+ a given timeout.
+****************************************************************/
+
+static void async_request_timeout_handler(struct event_context *ctx,
+                                       struct timed_event *te,
+                                       const struct timeval *now,
+                                       void *private_data)
+{
+       struct winbindd_async_request *state =
+               talloc_get_type_abort(private_data, struct winbindd_async_request);
+
+       DEBUG(0,("async_request_timeout_handler: child pid %u is not responding. "
+               "Closing connection to it.\n",
+               state->child_pid ));
+
+       /* Deal with the reply - set to error. */
+       async_reply_recv(private_data, False);
+}
+
+/**************************************************************
+ Common function called on both async send and recv fail.
+ Cleans up the child and schedules the next request.
+**************************************************************/
+
+static void async_request_fail(struct winbindd_async_request *state)
+{
+       DLIST_REMOVE(state->child->requests, state);
+
+       TALLOC_FREE(state->reply_timeout_event);
+
+       SMB_ASSERT(state->child_pid != (pid_t)0);
+
+       /* If not already reaped, send kill signal to child. */
+       if (state->child->pid == state->child_pid) {
+               kill(state->child_pid, SIGTERM);
+
+               /* 
+                * Close the socket to the child.
+                */
+               winbind_child_died(state->child_pid);
+       }
+
+       state->response->length = sizeof(struct winbindd_response);
+       state->response->result = WINBINDD_ERROR;
+       state->continuation(state->private_data, False);
+}
+
 static void async_request_sent(void *private_data_data, BOOL success)
 {
        struct winbindd_async_request *state =
                talloc_get_type_abort(private_data_data, struct winbindd_async_request);
 
        if (!success) {
-               DEBUG(5, ("Could not send async request\n"));
-
-               state->response->length = sizeof(struct winbindd_response);
-               state->response->result = WINBINDD_ERROR;
-               state->continuation(state->private_data, False);
+               DEBUG(5, ("Could not send async request to child pid %u\n",
+                       (unsigned int)state->child_pid ));
+               async_request_fail(state);
                return;
        }
 
@@ -180,6 +229,22 @@ static void async_request_sent(void *private_data_data, BOOL success)
                         &state->response->result,
                         sizeof(state->response->result),
                         async_reply_recv, state);
+
+       /* 
+        * Set up a timeout of 300 seconds for the response.
+        * If we don't get it close the child socket and
+        * report failure.
+        */
+
+       state->reply_timeout_event = event_add_timed(winbind_event_context(),
+                                                       NULL,
+                                                       timeval_current_ofs(300,0),
+                                                       "async_request_timeout",
+                                                       async_request_timeout_handler,
+                                                       state);
+       if (!state->reply_timeout_event) {
+               smb_panic("async_request_sent: failed to add timeout handler.\n");
+       }
 }
 
 static void async_reply_recv(void *private_data, BOOL success)
@@ -188,23 +253,23 @@ static void async_reply_recv(void *private_data, BOOL success)
                talloc_get_type_abort(private_data, struct winbindd_async_request);
        struct winbindd_child *child = state->child;
 
+       TALLOC_FREE(state->reply_timeout_event);
+
        state->response->length = sizeof(struct winbindd_response);
 
        if (!success) {
-               DEBUG(5, ("Could not receive async reply\n"));
+               DEBUG(5, ("Could not receive async reply from child pid %u\n",
+                       (unsigned int)state->child_pid ));
 
-               cache_cleanup_response(child->pid);
-               DLIST_REMOVE(child->requests, state);
-
-               state->response->result = WINBINDD_ERROR;
-               state->continuation(state->private_data, False);
+               cache_cleanup_response(state->child_pid);
+               async_request_fail(state);
                return;
        }
 
-       SMB_ASSERT(cache_retrieve_response(child->pid,
+       SMB_ASSERT(cache_retrieve_response(state->child_pid,
                                           state->response));
 
-       cache_cleanup_response(child->pid);
+       cache_cleanup_response(state->child_pid);
        
        DLIST_REMOVE(child->requests, state);
 
@@ -239,6 +304,9 @@ static void schedule_async_request(struct winbindd_child *child)
                return;
        }
 
+       /* Now we know who we're sending to - remember the pid. */
+       request->child_pid = child->pid;
+
        setup_async_write(&child->event, request->request,
                          sizeof(*request->request),
                          async_main_request_sent, request);
@@ -453,10 +521,14 @@ void winbind_child_died(pid_t pid)
        }
 
        if (child == NULL) {
-               DEBUG(0, ("Unknown child %d died!\n", pid));
+               DEBUG(5, ("Already reaped child %u died\n", (unsigned int)pid));
                return;
        }
 
+       /* This will be re-added in fork_domain_child() */
+
+       DLIST_REMOVE(children, child);
+       
        remove_fd_event(&child->event);
        close(child->event.fd);
        child->event.fd = 0;
@@ -649,9 +721,7 @@ static void account_lockout_policy_handler(struct event_context *ctx,
 
        DEBUG(10,("account_lockout_policy_handler called\n"));
 
-       if (child->lockout_policy_event) {
-               TALLOC_FREE(child->lockout_policy_event);
-       }
+       TALLOC_FREE(child->lockout_policy_event);
 
        methods = child->domain->methods;
 
@@ -815,6 +885,7 @@ static BOOL fork_domain_child(struct winbindd_child *child)
        int fdpair[2];
        struct winbindd_cli_state state;
        struct winbindd_domain *domain;
+       struct winbindd_domain *primary_domain = NULL;
 
        if (socketpair(AF_UNIX, SOCK_STREAM, 0, fdpair) != 0) {
                DEBUG(0, ("Could not open child pipe: %s\n",
@@ -825,9 +896,6 @@ static BOOL fork_domain_child(struct winbindd_child *child)
        ZERO_STRUCT(state);
        state.pid = sys_getpid();
 
-       /* Stop zombies */
-       CatchChild();
-
        /* Ensure we don't process messages whilst we're
           changing the disposition for the child. */
        message_block();
@@ -856,6 +924,9 @@ static BOOL fork_domain_child(struct winbindd_child *child)
 
        /* Child */
 
+       /* Stop zombies in children */
+       CatchChild();
+
        state.sock = fdpair[0];
        close(fdpair[1]);
 
@@ -894,13 +965,14 @@ static BOOL fork_domain_child(struct winbindd_child *child)
        }
 
        /* Ensure we have no pending check_online events other
-          than one for this domain. */
+          than one for this domain or the primary domain. */
 
        for (domain = domain_list(); domain; domain = domain->next) {
-               if (domain != child->domain) {
-                       if (domain->check_online_event) {
-                               TALLOC_FREE(domain->check_online_event);
-                       }
+               if (domain->primary) {
+                       primary_domain = domain;
+               }
+               if ((domain != child->domain) && !domain->primary) {
+                       TALLOC_FREE(domain->check_online_event);
                }
        }
 
@@ -916,6 +988,20 @@ static BOOL fork_domain_child(struct winbindd_child *child)
 
                set_domain_online_request(child->domain);
 
+               if (primary_domain != child->domain) {
+                       /* We need to talk to the primary
+                        * domain as well as the trusted
+                        * domain inside a trusted domain
+                        * child.
+                        * See the code in :
+                        * winbindd_dual_pam_auth_samlogon()
+                        * especially the calling of 
+                        * contact_domain = find_our_domain()
+                        * in the non-DC case for details.
+                        */
+                       set_domain_online_request(primary_domain);
+               }
+
                child->lockout_policy_event = event_add_timed(
                        winbind_event_context(), NULL, timeval_zero(),
                        "account_lockout_policy_handler",
@@ -935,6 +1021,11 @@ static BOOL fork_domain_child(struct winbindd_child *child)
                lp_TALLOC_FREE();
                main_loop_TALLOC_FREE();
 
+               /* check for signals */
+               winbind_check_sigterm(false);
+               winbind_check_sighup(override_logfile ? NULL :
+                       child->logfilename);
+
                run_events(winbind_event_context(), 0, NULL, NULL);
 
                GetTimeOfDay(&now);