Back-port of Volkers fix.
[samba.git] / source / nsswitch / winbindd_dual.c
index 54525f8da1f64dd9596953118f97fdbbdfd60617..396eca7beb389c181f06b01166b4ed3599f082f4 100644 (file)
@@ -97,9 +97,12 @@ struct winbindd_async_request {
        struct winbindd_response *response;
        void (*continuation)(void *private_data, BOOL success);
        struct timed_event *reply_timeout_event;
+       pid_t child_pid; /* pid of the child we're waiting on. Used to detect
+                           a restart of the child (child->pid != child_pid). */
        void *private_data;
 };
 
+static void async_request_fail(struct winbindd_async_request *state);
 static void async_main_request_sent(void *private_data, BOOL success);
 static void async_request_sent(void *private_data, BOOL success);
 static void async_reply_recv(void *private_data, BOOL success);
@@ -125,6 +128,7 @@ void async_request(TALLOC_CTX *mem_ctx, struct winbindd_child *child,
 
        state->mem_ctx = mem_ctx;
        state->child = child;
+       state->reply_timeout_event = NULL;
        state->request = request;
        state->response = response;
        state->continuation = continuation;
@@ -144,10 +148,7 @@ static void async_main_request_sent(void *private_data, BOOL success)
 
        if (!success) {
                DEBUG(5, ("Could not send async request\n"));
-
-               state->response->length = sizeof(struct winbindd_response);
-               state->response->result = WINBINDD_ERROR;
-               state->continuation(state->private_data, False);
+               async_request_fail(state);
                return;
        }
 
@@ -174,34 +175,51 @@ static void async_request_timeout_handler(struct event_context *ctx,
        struct winbindd_async_request *state =
                talloc_get_type_abort(private_data, struct winbindd_async_request);
 
-       /* Deal with the reply - set to error. */
+       DEBUG(0,("async_request_timeout_handler: child pid %u is not responding. "
+               "Closing connection to it.\n",
+               state->child_pid ));
 
+       /* Deal with the reply - set to error. */
        async_reply_recv(private_data, False);
+}
 
-       /* 
-        * Close the socket to the child. Should cause the
       * child to exit.
-        */
+/**************************************************************
+ Common function called on both async send and recv fail.
Cleans up the child and schedules the next request.
+**************************************************************/
 
-       DEBUG(0,("async_request_timeout_handler: child pid %u is not responding. "
-               "Closing connection to it.\n",
-               state->child->pid ));
+static void async_request_fail(struct winbindd_async_request *state)
+{
+       DLIST_REMOVE(state->child->requests, state);
+
+       TALLOC_FREE(state->reply_timeout_event);
 
-       winbind_child_died(state->child->pid);
+       SMB_ASSERT(state->child_pid != (pid_t)0);
+
+       /* If not already reaped, send kill signal to child. */
+       if (state->child->pid == state->child_pid) {
+               kill(state->child_pid, SIGTERM);
+
+               /* 
+                * Close the socket to the child.
+                */
+               winbind_child_died(state->child_pid);
+       }
+
+       state->response->length = sizeof(struct winbindd_response);
+       state->response->result = WINBINDD_ERROR;
+       state->continuation(state->private_data, False);
 }
 
 static void async_request_sent(void *private_data_data, BOOL success)
 {
-       uint32_t timeout = 30;
        struct winbindd_async_request *state =
                talloc_get_type_abort(private_data_data, struct winbindd_async_request);
 
        if (!success) {
-               DEBUG(5, ("Could not send async request\n"));
-
-               state->response->length = sizeof(struct winbindd_response);
-               state->response->result = WINBINDD_ERROR;
-               state->continuation(state->private_data, False);
+               DEBUG(5, ("Could not send async request to child pid %u\n",
+                       (unsigned int)state->child_pid ));
+               async_request_fail(state);
                return;
        }
 
@@ -213,25 +231,14 @@ static void async_request_sent(void *private_data_data, BOOL success)
                         async_reply_recv, state);
 
        /* 
-        * Normal timeouts are 30s, but auth requests may take a long
-        * time to timeout.
-        */
-
-       if (state->request->cmd == WINBINDD_PAM_AUTH ||
-                       state->request->cmd == WINBINDD_PAM_AUTH_CRAP ) {
-
-               timeout = 300;
-       }
-
-       /* 
-        * Set up a timeout of 1 minute for the response.
+        * Set up a timeout of 300 seconds for the response.
         * If we don't get it close the child socket and
         * report failure.
         */
 
        state->reply_timeout_event = event_add_timed(winbind_event_context(),
                                                        NULL,
-                                                       timeval_current_ofs(timeout,0),
+                                                       timeval_current_ofs(300,0),
                                                        "async_request_timeout",
                                                        async_request_timeout_handler,
                                                        state);
@@ -246,27 +253,23 @@ static void async_reply_recv(void *private_data, BOOL success)
                talloc_get_type_abort(private_data, struct winbindd_async_request);
        struct winbindd_child *child = state->child;
 
-       if (state->reply_timeout_event) {
-               TALLOC_FREE(state->reply_timeout_event);
-       }
+       TALLOC_FREE(state->reply_timeout_event);
 
        state->response->length = sizeof(struct winbindd_response);
 
        if (!success) {
-               DEBUG(5, ("Could not receive async reply\n"));
+               DEBUG(5, ("Could not receive async reply from child pid %u\n",
+                       (unsigned int)state->child_pid ));
 
-               cache_cleanup_response(child->pid);
-               DLIST_REMOVE(child->requests, state);
-
-               state->response->result = WINBINDD_ERROR;
-               state->continuation(state->private_data, False);
+               cache_cleanup_response(state->child_pid);
+               async_request_fail(state);
                return;
        }
 
-       SMB_ASSERT(cache_retrieve_response(child->pid,
+       SMB_ASSERT(cache_retrieve_response(state->child_pid,
                                           state->response));
 
-       cache_cleanup_response(child->pid);
+       cache_cleanup_response(state->child_pid);
        
        DLIST_REMOVE(child->requests, state);
 
@@ -301,6 +304,9 @@ static void schedule_async_request(struct winbindd_child *child)
                return;
        }
 
+       /* Now we know who we're sending to - remember the pid. */
+       request->child_pid = child->pid;
+
        setup_async_write(&child->event, request->request,
                          sizeof(*request->request),
                          async_main_request_sent, request);
@@ -515,10 +521,14 @@ void winbind_child_died(pid_t pid)
        }
 
        if (child == NULL) {
-               DEBUG(0, ("Unknown child %d died!\n", pid));
+               DEBUG(5, ("Already reaped child %u died\n", (unsigned int)pid));
                return;
        }
 
+       /* This will be re-added in fork_domain_child() */
+
+       DLIST_REMOVE(children, child);
+       
        remove_fd_event(&child->event);
        close(child->event.fd);
        child->event.fd = 0;
@@ -540,11 +550,8 @@ void winbindd_flush_negative_conn_cache(struct winbindd_domain *domain)
 
 /* Set our domains as offline and forward the offline message to our children. */
 
-void winbind_msg_offline(struct messaging_context *msg_ctx,
-                        void *private_data,
-                        uint32_t msg_type,
-                        struct server_id server_id,
-                        DATA_BLOB *data)
+void winbind_msg_offline(int msg_type, struct process_id src,
+                        void *buf, size_t len, void *private_data)
 {
        struct winbindd_child *child;
        struct winbindd_domain *domain;
@@ -569,6 +576,21 @@ void winbind_msg_offline(struct messaging_context *msg_ctx,
                }
                DEBUG(5,("winbind_msg_offline: marking %s offline.\n", domain->name));
                set_domain_offline(domain);
+
+               /* Send an offline message to the idmap child when our
+                  primary domain goes offline */
+
+               if ( domain->primary ) {
+                       struct winbindd_child *idmap = idmap_child();
+
+                       if ( idmap->pid != 0 ) {
+                               message_send_pid(pid_to_procid(idmap->pid), 
+                                                MSG_WINBIND_OFFLINE, 
+                                                domain->name, 
+                                                strlen(domain->name)+1, 
+                                                False);
+                       }                       
+               }
        }
 
        for (child = children; child != NULL; child = child->next) {
@@ -589,20 +611,15 @@ void winbind_msg_offline(struct messaging_context *msg_ctx,
                DEBUG(10,("winbind_msg_offline: sending message to pid %u for domain %s.\n",
                        (unsigned int)child->pid, domain->name ));
 
-               messaging_send_buf(msg_ctx, pid_to_procid(child->pid),
-                                  MSG_WINBIND_OFFLINE,
-                                  (uint8 *)child->domain->name,
-                                  strlen(child->domain->name)+1);
+               message_send_pid(pid_to_procid(child->pid), MSG_WINBIND_OFFLINE, child->domain->name,
+                       strlen(child->domain->name)+1, False);
        }
 }
 
 /* Set our domains as online and forward the online message to our children. */
 
-void winbind_msg_online(struct messaging_context *msg_ctx,
-                       void *private_data,
-                       uint32_t msg_type,
-                       struct server_id server_id,
-                       DATA_BLOB *data)
+void winbind_msg_online(int msg_type, struct process_id src,
+                       void *buf, size_t len, void *private_data)
 {
        struct winbindd_child *child;
        struct winbindd_domain *domain;
@@ -637,11 +654,11 @@ void winbind_msg_online(struct messaging_context *msg_ctx,
                        struct winbindd_child *idmap = idmap_child();
                        
                        if ( idmap->pid != 0 ) {
-                               messaging_send_buf(msg_ctx,
-                                                  pid_to_procid(idmap->pid), 
-                                                  MSG_WINBIND_ONLINE,
-                                                  (uint8 *)domain->name,
-                                                  strlen(domain->name)+1);
+                               message_send_pid(pid_to_procid(idmap->pid), 
+                                                MSG_WINBIND_ONLINE,
+                                                domain->name,
+                                                strlen(domain->name)+1, 
+                                                False);
                        }
                        
                }
@@ -664,19 +681,14 @@ void winbind_msg_online(struct messaging_context *msg_ctx,
                DEBUG(10,("winbind_msg_online: sending message to pid %u for domain %s.\n",
                        (unsigned int)child->pid, child->domain->name ));
 
-               messaging_send_buf(msg_ctx, pid_to_procid(child->pid),
-                                  MSG_WINBIND_ONLINE,
-                                  (uint8 *)child->domain->name,
-                                  strlen(child->domain->name)+1);
+               message_send_pid(pid_to_procid(child->pid), MSG_WINBIND_ONLINE, child->domain->name,
+                       strlen(child->domain->name)+1, False);
        }
 }
 
 /* Forward the online/offline messages to our children. */
-void winbind_msg_onlinestatus(struct messaging_context *msg_ctx,
-                             void *private_data,
-                             uint32_t msg_type,
-                             struct server_id server_id,
-                             DATA_BLOB *data)
+void winbind_msg_onlinestatus(int msg_type, struct process_id src,
+                             void *buf, size_t len, void *private_data)
 {
        struct winbindd_child *child;
 
@@ -687,10 +699,8 @@ void winbind_msg_onlinestatus(struct messaging_context *msg_ctx,
                        DEBUG(10,("winbind_msg_onlinestatus: "
                                  "sending message to pid %u of primary domain.\n",
                                  (unsigned int)child->pid));
-                       messaging_send_buf(msg_ctx, pid_to_procid(child->pid), 
-                                          MSG_WINBIND_ONLINESTATUS,
-                                          (uint8 *)data->data,
-                                          data->length);
+                       message_send_pid(pid_to_procid(child->pid), 
+                                        MSG_WINBIND_ONLINESTATUS, buf, len, False);
                        break;
                }
        }
@@ -711,17 +721,7 @@ static void account_lockout_policy_handler(struct event_context *ctx,
 
        DEBUG(10,("account_lockout_policy_handler called\n"));
 
-       if (child->lockout_policy_event) {
-               TALLOC_FREE(child->lockout_policy_event);
-       }
-
-       if ( !winbindd_can_contact_domain( child->domain ) ) {
-               DEBUG(10,("account_lockout_policy_handler: Removing myself since I "
-                         "do not have an incoming trust to domain %s\n", 
-                         child->domain->name));
-
-               return;         
-       }
+       TALLOC_FREE(child->lockout_policy_event);
 
        methods = child->domain->methods;
 
@@ -748,16 +748,13 @@ static void account_lockout_policy_handler(struct event_context *ctx,
 
 /* Deal with a request to go offline. */
 
-static void child_msg_offline(struct messaging_context *msg,
-                             void *private_data,
-                             uint32_t msg_type,
-                             struct server_id server_id,
-                             DATA_BLOB *data)
+static void child_msg_offline(int msg_type, struct process_id src,
+                             void *buf, size_t len, void *private_data)
 {
        struct winbindd_domain *domain;
-       const char *domainname = (const char *)data->data;
+       const char *domainname = (const char *)buf;
 
-       if (data->data == NULL || data->length == 0) {
+       if (buf == NULL || len == 0) {
                return;
        }
 
@@ -768,6 +765,12 @@ static void child_msg_offline(struct messaging_context *msg,
                return;
        }
 
+       /* Set our global state as offline. */
+       if (!set_global_winbindd_state_offline()) {
+               DEBUG(10,("child_msg_offline: offline request failed.\n"));
+               return;
+       }
+
        /* Mark the requested domain offline. */
 
        for (domain = domain_list(); domain; domain = domain->next) {
@@ -783,16 +786,13 @@ static void child_msg_offline(struct messaging_context *msg,
 
 /* Deal with a request to go online. */
 
-static void child_msg_online(struct messaging_context *msg,
-                            void *private_data,
-                            uint32_t msg_type,
-                            struct server_id server_id,
-                            DATA_BLOB *data)
+static void child_msg_online(int msg_type, struct process_id src,
+                            void *buf, size_t len, void *private_data)
 {
        struct winbindd_domain *domain;
-       const char *domainname = (const char *)data->data;
+       const char *domainname = (const char *)buf;
 
-       if (data->data == NULL || data->length == 0) {
+       if (buf == NULL || len == 0) {
                return;
        }
 
@@ -848,23 +848,20 @@ static const char *collect_onlinestatus(TALLOC_CTX *mem_ctx)
        return buf;
 }
 
-static void child_msg_onlinestatus(struct messaging_context *msg_ctx,
-                                  void *private_data,
-                                  uint32_t msg_type,
-                                  struct server_id server_id,
-                                  DATA_BLOB *data)
+static void child_msg_onlinestatus(int msg_type, struct process_id src,
+                                  void *buf, size_t len, void *private_data)
 {
        TALLOC_CTX *mem_ctx;
        const char *message;
-       struct server_id *sender;
+       struct process_id *sender;
        
        DEBUG(5,("winbind_msg_onlinestatus received.\n"));
 
-       if (!data->data) {
+       if (!buf) {
                return;
        }
 
-       sender = (struct server_id *)data->data;
+       sender = (struct process_id *)buf;
 
        mem_ctx = talloc_init("winbind_msg_onlinestatus");
        if (mem_ctx == NULL) {
@@ -877,8 +874,8 @@ static void child_msg_onlinestatus(struct messaging_context *msg_ctx,
                return;
        }
 
-       messaging_send_buf(msg_ctx, *sender, MSG_WINBIND_ONLINESTATUS, 
-                          (uint8 *)message, strlen(message) + 1);
+       message_send_pid(*sender, MSG_WINBIND_ONLINESTATUS, 
+                        message, strlen(message) + 1, True);
 
        talloc_destroy(mem_ctx);
 }
@@ -888,6 +885,7 @@ static BOOL fork_domain_child(struct winbindd_child *child)
        int fdpair[2];
        struct winbindd_cli_state state;
        struct winbindd_domain *domain;
+       struct winbindd_domain *primary_domain = NULL;
 
        if (socketpair(AF_UNIX, SOCK_STREAM, 0, fdpair) != 0) {
                DEBUG(0, ("Could not open child pipe: %s\n",
@@ -898,13 +896,15 @@ static BOOL fork_domain_child(struct winbindd_child *child)
        ZERO_STRUCT(state);
        state.pid = sys_getpid();
 
-       /* Stop zombies */
-       CatchChild();
+       /* Ensure we don't process messages whilst we're
+          changing the disposition for the child. */
+       message_block();
 
        child->pid = sys_fork();
 
        if (child->pid == -1) {
                DEBUG(0, ("Could not fork: %s\n", strerror(errno)));
+               message_unblock();
                return False;
        }
 
@@ -917,11 +917,16 @@ static BOOL fork_domain_child(struct winbindd_child *child)
                child->event.flags = 0;
                child->requests = NULL;
                add_fd_event(&child->event);
+               /* We're ok with online/offline messages now. */
+               message_unblock();
                return True;
        }
 
        /* Child */
 
+       /* Stop zombies in children */
+       CatchChild();
+
        state.sock = fdpair[0];
        close(fdpair[1]);
 
@@ -938,32 +943,21 @@ static BOOL fork_domain_child(struct winbindd_child *child)
                reopen_logs();
        }
 
-       /*
-        * For clustering, we need to re-init our ctdbd connection after the
-        * fork
-        */
-       if (!NT_STATUS_IS_OK(messaging_reinit(winbind_messaging_context())))
-               exit(1);
-
        /* Don't handle the same messages as our parent. */
-       messaging_deregister(winbind_messaging_context(),
-                            MSG_SMB_CONF_UPDATED, NULL);
-       messaging_deregister(winbind_messaging_context(),
-                            MSG_SHUTDOWN, NULL);
-       messaging_deregister(winbind_messaging_context(),
-                            MSG_WINBIND_OFFLINE, NULL);
-       messaging_deregister(winbind_messaging_context(),
-                            MSG_WINBIND_ONLINE, NULL);
-       messaging_deregister(winbind_messaging_context(),
-                            MSG_WINBIND_ONLINESTATUS, NULL);
+       message_deregister(MSG_SMB_CONF_UPDATED);
+       message_deregister(MSG_SHUTDOWN);
+       message_deregister(MSG_WINBIND_OFFLINE);
+       message_deregister(MSG_WINBIND_ONLINE);
+       message_deregister(MSG_WINBIND_ONLINESTATUS);
+
+       /* The child is ok with online/offline messages now. */
+       message_unblock();
 
        /* Handle online/offline messages. */
-       messaging_register(winbind_messaging_context(), NULL,
-                          MSG_WINBIND_OFFLINE, child_msg_offline);
-       messaging_register(winbind_messaging_context(), NULL,
-                          MSG_WINBIND_ONLINE, child_msg_online);
-       messaging_register(winbind_messaging_context(), NULL,
-                          MSG_WINBIND_ONLINESTATUS, child_msg_onlinestatus);
+       message_register(MSG_WINBIND_OFFLINE, child_msg_offline, NULL);
+       message_register(MSG_WINBIND_ONLINE, child_msg_online, NULL);
+       message_register(MSG_WINBIND_ONLINESTATUS, child_msg_onlinestatus,
+                        NULL);
 
        if ( child->domain ) {
                child->domain->startup = True;
@@ -971,13 +965,14 @@ static BOOL fork_domain_child(struct winbindd_child *child)
        }
 
        /* Ensure we have no pending check_online events other
-          than one for this domain. */
+          than one for this domain or the primary domain. */
 
        for (domain = domain_list(); domain; domain = domain->next) {
-               if (domain != child->domain) {
-                       if (domain->check_online_event) {
-                               TALLOC_FREE(domain->check_online_event);
-                       }
+               if (domain->primary) {
+                       primary_domain = domain;
+               }
+               if ((domain != child->domain) && !domain->primary) {
+                       TALLOC_FREE(domain->check_online_event);
                }
        }
 
@@ -993,6 +988,20 @@ static BOOL fork_domain_child(struct winbindd_child *child)
 
                set_domain_online_request(child->domain);
 
+               if (primary_domain != child->domain) {
+                       /* We need to talk to the primary
+                        * domain as well as the trusted
+                        * domain inside a trusted domain
+                        * child.
+                        * See the code in :
+                        * winbindd_dual_pam_auth_samlogon()
+                        * especially the calling of 
+                        * contact_domain = find_our_domain()
+                        * in the non-DC case for details.
+                        */
+                       set_domain_online_request(primary_domain);
+               }
+
                child->lockout_policy_event = event_add_timed(
                        winbind_event_context(), NULL, timeval_zero(),
                        "account_lockout_policy_handler",
@@ -1012,6 +1021,11 @@ static BOOL fork_domain_child(struct winbindd_child *child)
                lp_TALLOC_FREE();
                main_loop_TALLOC_FREE();
 
+               /* check for signals */
+               winbind_check_sigterm(false);
+               winbind_check_sighup(override_logfile ? NULL :
+                       child->logfilename);
+
                run_events(winbind_event_context(), 0, NULL, NULL);
 
                GetTimeOfDay(&now);
@@ -1032,7 +1046,7 @@ static BOOL fork_domain_child(struct winbindd_child *child)
 
                /* Handle messages */
 
-               message_dispatch(winbind_messaging_context());
+               message_dispatch();
 
                FD_ZERO(&read_fds);
                FD_SET(state.sock, &read_fds);