Back-port of Volkers fix.
[samba.git] / source / nsswitch / winbindd_dual.c
index cdfa1e049320f9c7b9faf1a09e86266b531fded9..396eca7beb389c181f06b01166b4ed3599f082f4 100644 (file)
@@ -34,6 +34,8 @@
 #undef DBGC_CLASS
 #define DBGC_CLASS DBGC_WINBIND
 
+extern BOOL override_logfile;
+
 /* Read some data from a client connection */
 
 static void child_read_request(struct winbindd_cli_state *state)
@@ -46,31 +48,31 @@ static void child_read_request(struct winbindd_cli_state *state)
                        sizeof(state->request));
 
        if (len != sizeof(state->request)) {
-               DEBUG(0, ("Got invalid request length: %d\n", (int)len));
+               DEBUG(len > 0 ? 0 : 3, ("Got invalid request length: %d\n", (int)len));
                state->finished = True;
                return;
        }
 
        if (state->request.extra_len == 0) {
-               state->request.extra_data = NULL;
+               state->request.extra_data.data = NULL;
                return;
        }
 
        DEBUG(10, ("Need to read %d extra bytes\n", (int)state->request.extra_len));
 
-       state->request.extra_data =
+       state->request.extra_data.data =
                SMB_MALLOC_ARRAY(char, state->request.extra_len + 1);
 
-       if (state->request.extra_data == NULL) {
+       if (state->request.extra_data.data == NULL) {
                DEBUG(0, ("malloc failed\n"));
                state->finished = True;
                return;
        }
 
        /* Ensure null termination */
-       state->request.extra_data[state->request.extra_len] = '\0';
+       state->request.extra_data.data[state->request.extra_len] = '\0';
 
-       len = read_data(state->sock, state->request.extra_data,
+       len = read_data(state->sock, state->request.extra_data.data,
                        state->request.extra_len);
 
        if (len != state->request.extra_len) {
@@ -94,9 +96,13 @@ struct winbindd_async_request {
        struct winbindd_request *request;
        struct winbindd_response *response;
        void (*continuation)(void *private_data, BOOL success);
+       struct timed_event *reply_timeout_event;
+       pid_t child_pid; /* pid of the child we're waiting on. Used to detect
+                           a restart of the child (child->pid != child_pid). */
        void *private_data;
 };
 
+static void async_request_fail(struct winbindd_async_request *state);
 static void async_main_request_sent(void *private_data, BOOL success);
 static void async_request_sent(void *private_data, BOOL success);
 static void async_reply_recv(void *private_data, BOOL success);
@@ -108,7 +114,7 @@ void async_request(TALLOC_CTX *mem_ctx, struct winbindd_child *child,
                   void (*continuation)(void *private_data, BOOL success),
                   void *private_data)
 {
-       struct winbindd_async_request *state, *tmp;
+       struct winbindd_async_request *state;
 
        SMB_ASSERT(continuation != NULL);
 
@@ -122,12 +128,13 @@ void async_request(TALLOC_CTX *mem_ctx, struct winbindd_child *child,
 
        state->mem_ctx = mem_ctx;
        state->child = child;
+       state->reply_timeout_event = NULL;
        state->request = request;
        state->response = response;
        state->continuation = continuation;
        state->private_data = private_data;
 
-       DLIST_ADD_END(child->requests, state, tmp);
+       DLIST_ADD_END(child->requests, state, struct winbindd_async_request *);
 
        schedule_async_request(child);
 
@@ -141,10 +148,7 @@ static void async_main_request_sent(void *private_data, BOOL success)
 
        if (!success) {
                DEBUG(5, ("Could not send async request\n"));
-
-               state->response->length = sizeof(struct winbindd_response);
-               state->response->result = WINBINDD_ERROR;
-               state->continuation(state->private_data, False);
+               async_request_fail(state);
                return;
        }
 
@@ -153,22 +157,69 @@ static void async_main_request_sent(void *private_data, BOOL success)
                return;
        }
 
-       setup_async_write(&state->child->event, state->request->extra_data,
+       setup_async_write(&state->child->event, state->request->extra_data.data,
                          state->request->extra_len,
                          async_request_sent, state);
 }
 
+/****************************************************************
+ Handler triggered if the child winbindd doesn't respond within
+ a given timeout.
+****************************************************************/
+
+static void async_request_timeout_handler(struct event_context *ctx,
+                                       struct timed_event *te,
+                                       const struct timeval *now,
+                                       void *private_data)
+{
+       struct winbindd_async_request *state =
+               talloc_get_type_abort(private_data, struct winbindd_async_request);
+
+       DEBUG(0,("async_request_timeout_handler: child pid %u is not responding. "
+               "Closing connection to it.\n",
+               state->child_pid ));
+
+       /* Deal with the reply - set to error. */
+       async_reply_recv(private_data, False);
+}
+
+/**************************************************************
+ Common function called on both async send and recv fail.
+ Cleans up the child and schedules the next request.
+**************************************************************/
+
+static void async_request_fail(struct winbindd_async_request *state)
+{
+       DLIST_REMOVE(state->child->requests, state);
+
+       TALLOC_FREE(state->reply_timeout_event);
+
+       SMB_ASSERT(state->child_pid != (pid_t)0);
+
+       /* If not already reaped, send kill signal to child. */
+       if (state->child->pid == state->child_pid) {
+               kill(state->child_pid, SIGTERM);
+
+               /* 
+                * Close the socket to the child.
+                */
+               winbind_child_died(state->child_pid);
+       }
+
+       state->response->length = sizeof(struct winbindd_response);
+       state->response->result = WINBINDD_ERROR;
+       state->continuation(state->private_data, False);
+}
+
 static void async_request_sent(void *private_data_data, BOOL success)
 {
        struct winbindd_async_request *state =
                talloc_get_type_abort(private_data_data, struct winbindd_async_request);
 
        if (!success) {
-               DEBUG(5, ("Could not send async request\n"));
-
-               state->response->length = sizeof(struct winbindd_response);
-               state->response->result = WINBINDD_ERROR;
-               state->continuation(state->private_data, False);
+               DEBUG(5, ("Could not send async request to child pid %u\n",
+                       (unsigned int)state->child_pid ));
+               async_request_fail(state);
                return;
        }
 
@@ -178,6 +229,22 @@ static void async_request_sent(void *private_data_data, BOOL success)
                         &state->response->result,
                         sizeof(state->response->result),
                         async_reply_recv, state);
+
+       /* 
+        * Set up a timeout of 300 seconds for the response.
+        * If we don't get it close the child socket and
+        * report failure.
+        */
+
+       state->reply_timeout_event = event_add_timed(winbind_event_context(),
+                                                       NULL,
+                                                       timeval_current_ofs(300,0),
+                                                       "async_request_timeout",
+                                                       async_request_timeout_handler,
+                                                       state);
+       if (!state->reply_timeout_event) {
+               smb_panic("async_request_sent: failed to add timeout handler.\n");
+       }
 }
 
 static void async_reply_recv(void *private_data, BOOL success)
@@ -186,18 +253,23 @@ static void async_reply_recv(void *private_data, BOOL success)
                talloc_get_type_abort(private_data, struct winbindd_async_request);
        struct winbindd_child *child = state->child;
 
+       TALLOC_FREE(state->reply_timeout_event);
+
        state->response->length = sizeof(struct winbindd_response);
 
        if (!success) {
-               DEBUG(5, ("Could not receive async reply\n"));
-               state->response->result = WINBINDD_ERROR;
+               DEBUG(5, ("Could not receive async reply from child pid %u\n",
+                       (unsigned int)state->child_pid ));
+
+               cache_cleanup_response(state->child_pid);
+               async_request_fail(state);
                return;
        }
 
-       SMB_ASSERT(cache_retrieve_response(child->pid,
+       SMB_ASSERT(cache_retrieve_response(state->child_pid,
                                           state->response));
 
-       cache_cleanup_response(child->pid);
+       cache_cleanup_response(state->child_pid);
        
        DLIST_REMOVE(child->requests, state);
 
@@ -232,11 +304,13 @@ static void schedule_async_request(struct winbindd_child *child)
                return;
        }
 
+       /* Now we know who we're sending to - remember the pid. */
+       request->child_pid = child->pid;
+
        setup_async_write(&child->event, request->request,
                          sizeof(*request->request),
                          async_main_request_sent, request);
 
-       talloc_destroy(child->mem_ctx);
        return;
 }
 
@@ -345,6 +419,7 @@ static struct winbindd_child_dispatch_table child_dispatch_table[] = {
        
        { WINBINDD_LOOKUPSID,            winbindd_dual_lookupsid,             "LOOKUPSID" },
        { WINBINDD_LOOKUPNAME,           winbindd_dual_lookupname,            "LOOKUPNAME" },
+       { WINBINDD_LOOKUPRIDS,           winbindd_dual_lookuprids,            "LOOKUPRIDS" },
        { WINBINDD_LIST_TRUSTDOM,        winbindd_dual_list_trusted_domains,  "LIST_TRUSTDOM" },
        { WINBINDD_INIT_CONNECTION,      winbindd_dual_init_connection,       "INIT_CONNECTION" },
        { WINBINDD_GETDCNAME,            winbindd_dual_getdcname,             "GETDCNAME" },
@@ -352,19 +427,29 @@ static struct winbindd_child_dispatch_table child_dispatch_table[] = {
        { WINBINDD_PAM_AUTH,             winbindd_dual_pam_auth,              "PAM_AUTH" },
        { WINBINDD_PAM_AUTH_CRAP,        winbindd_dual_pam_auth_crap,         "AUTH_CRAP" },
        { WINBINDD_PAM_LOGOFF,           winbindd_dual_pam_logoff,            "PAM_LOGOFF" },
+       { WINBINDD_PAM_CHNG_PSWD_AUTH_CRAP,winbindd_dual_pam_chng_pswd_auth_crap,"CHNG_PSWD_AUTH_CRAP" },
+       { WINBINDD_PAM_CHAUTHTOK,        winbindd_dual_pam_chauthtok,         "PAM_CHAUTHTOK" },
        { WINBINDD_CHECK_MACHACC,        winbindd_dual_check_machine_acct,    "CHECK_MACHACC" },
        { WINBINDD_DUAL_SID2UID,         winbindd_dual_sid2uid,               "DUAL_SID2UID" },
        { WINBINDD_DUAL_SID2GID,         winbindd_dual_sid2gid,               "DUAL_SID2GID" },
+#if 0   /* DISABLED until we fix the interface in Samba 3.0.26 --jerry */
+       { WINBINDD_DUAL_SIDS2XIDS,       winbindd_dual_sids2xids,             "DUAL_SIDS2XIDS" },
+#endif  /* end DISABLED */
+       { WINBINDD_DUAL_UID2SID,         winbindd_dual_uid2sid,               "DUAL_UID2SID" },
+       { WINBINDD_DUAL_GID2SID,         winbindd_dual_gid2sid,               "DUAL_GID2SID" },
        { WINBINDD_DUAL_UID2NAME,        winbindd_dual_uid2name,              "DUAL_UID2NAME" },
        { WINBINDD_DUAL_NAME2UID,        winbindd_dual_name2uid,              "DUAL_NAME2UID" },
        { WINBINDD_DUAL_GID2NAME,        winbindd_dual_gid2name,              "DUAL_GID2NAME" },
        { WINBINDD_DUAL_NAME2GID,        winbindd_dual_name2gid,              "DUAL_NAME2GID" },
-       { WINBINDD_DUAL_IDMAPSET,        winbindd_dual_idmapset,              "DUAL_IDMAPSET" },
+       { WINBINDD_DUAL_SET_MAPPING,     winbindd_dual_set_mapping,           "DUAL_SET_MAPPING" },
+       { WINBINDD_DUAL_SET_HWM,         winbindd_dual_set_hwm,               "DUAL_SET_HWMS" },
+       { WINBINDD_DUAL_DUMP_MAPS,       winbindd_dual_dump_maps,             "DUAL_DUMP_MAPS" },
        { WINBINDD_DUAL_USERINFO,        winbindd_dual_userinfo,              "DUAL_USERINFO" },
        { WINBINDD_ALLOCATE_UID,         winbindd_dual_allocate_uid,          "ALLOCATE_UID" },
        { WINBINDD_ALLOCATE_GID,         winbindd_dual_allocate_gid,          "ALLOCATE_GID" },
        { WINBINDD_GETUSERDOMGROUPS,     winbindd_dual_getuserdomgroups,      "GETUSERDOMGROUPS" },
        { WINBINDD_DUAL_GETSIDALIASES,   winbindd_dual_getsidaliases,         "GETSIDALIASES" },
+       { WINBINDD_CCACHE_NTLMAUTH,      winbindd_dual_ccache_ntlm_auth,      "CCACHE_NTLM_AUTH" },
        /* End of list */
 
        { WINBINDD_NUM_CMDS, NULL, "NONE" }
@@ -436,10 +521,14 @@ void winbind_child_died(pid_t pid)
        }
 
        if (child == NULL) {
-               DEBUG(0, ("Unknown child %d died!\n", pid));
+               DEBUG(5, ("Already reaped child %u died\n", (unsigned int)pid));
                return;
        }
 
+       /* This will be re-added in fork_domain_child() */
+
+       DLIST_REMOVE(children, child);
+       
        remove_fd_event(&child->event);
        close(child->event.fd);
        child->event.fd = 0;
@@ -449,10 +538,23 @@ void winbind_child_died(pid_t pid)
        schedule_async_request(child);
 }
 
-/* Forward the online/offline messages to our children. */
-void winbind_msg_offline(int msg_type, struct process_id src, void *buf, size_t len)
+/* Ensure any negative cache entries with the netbios or realm names are removed. */
+
+void winbindd_flush_negative_conn_cache(struct winbindd_domain *domain)
+{
+       flush_negative_conn_cache_for_domain(domain->name);
+       if (*domain->alt_name) {
+               flush_negative_conn_cache_for_domain(domain->alt_name);
+       }
+}
+
+/* Set our domains as offline and forward the offline message to our children. */
+
+void winbind_msg_offline(int msg_type, struct process_id src,
+                        void *buf, size_t len, void *private_data)
 {
        struct winbindd_child *child;
+       struct winbindd_domain *domain;
 
        DEBUG(10,("winbind_msg_offline: got offline message.\n"));
 
@@ -467,17 +569,60 @@ void winbind_msg_offline(int msg_type, struct process_id src, void *buf, size_t
                return;
        }
 
+       /* Set all our domains as offline. */
+       for (domain = domain_list(); domain; domain = domain->next) {
+               if (domain->internal) {
+                       continue;
+               }
+               DEBUG(5,("winbind_msg_offline: marking %s offline.\n", domain->name));
+               set_domain_offline(domain);
+
+               /* Send an offline message to the idmap child when our
+                  primary domain goes offline */
+
+               if ( domain->primary ) {
+                       struct winbindd_child *idmap = idmap_child();
+
+                       if ( idmap->pid != 0 ) {
+                               message_send_pid(pid_to_procid(idmap->pid), 
+                                                MSG_WINBIND_OFFLINE, 
+                                                domain->name, 
+                                                strlen(domain->name)+1, 
+                                                False);
+                       }                       
+               }
+       }
+
        for (child = children; child != NULL; child = child->next) {
-               DEBUG(10,("winbind_msg_offline: sending message to pid %u.\n",
-                       (unsigned int)child->pid ));
-               message_send_pid(pid_to_procid(child->pid), MSG_WINBIND_OFFLINE, NULL, 0, False);
+               /* Don't send message to idmap child.  We've already
+                  done so above. */
+               if (!child->domain || (child == idmap_child())) {
+                       continue;
+               }
+
+               /* Or internal domains (this should not be possible....) */
+               if (child->domain->internal) {
+                       continue;
+               }
+
+               /* Each winbindd child should only process requests for one domain - make sure
+                  we only set it online / offline for that domain. */
+
+               DEBUG(10,("winbind_msg_offline: sending message to pid %u for domain %s.\n",
+                       (unsigned int)child->pid, domain->name ));
+
+               message_send_pid(pid_to_procid(child->pid), MSG_WINBIND_OFFLINE, child->domain->name,
+                       strlen(child->domain->name)+1, False);
        }
 }
 
-/* Forward the online/offline messages to our children. */
-void winbind_msg_online(int msg_type, struct process_id src, void *buf, size_t len)
+/* Set our domains as online and forward the online message to our children. */
+
+void winbind_msg_online(int msg_type, struct process_id src,
+                       void *buf, size_t len, void *private_data)
 {
        struct winbindd_child *child;
+       struct winbindd_domain *domain;
 
        DEBUG(10,("winbind_msg_online: got online message.\n"));
 
@@ -489,38 +634,112 @@ void winbind_msg_online(int msg_type, struct process_id src, void *buf, size_t l
        /* Set our global state as online. */
        set_global_winbindd_state_online();
 
+       smb_nscd_flush_user_cache();
+       smb_nscd_flush_group_cache();
+
+       /* Set all our domains as online. */
+       for (domain = domain_list(); domain; domain = domain->next) {
+               if (domain->internal) {
+                       continue;
+               }
+               DEBUG(5,("winbind_msg_online: requesting %s to go online.\n", domain->name));
+
+               winbindd_flush_negative_conn_cache(domain);
+               set_domain_online_request(domain);
+
+               /* Send an online message to the idmap child when our
+                  primary domain comes back online */
+
+               if ( domain->primary ) {
+                       struct winbindd_child *idmap = idmap_child();
+                       
+                       if ( idmap->pid != 0 ) {
+                               message_send_pid(pid_to_procid(idmap->pid), 
+                                                MSG_WINBIND_ONLINE,
+                                                domain->name,
+                                                strlen(domain->name)+1, 
+                                                False);
+                       }
+                       
+               }
+       }
+
        for (child = children; child != NULL; child = child->next) {
-               DEBUG(10,("winbind_msg_online: sending message to pid %u.\n",
-                       (unsigned int)child->pid ));
-               message_send_pid(pid_to_procid(child->pid), MSG_WINBIND_ONLINE, NULL, 0, False);
+               /* Don't send message to idmap child. */
+               if (!child->domain || (child == idmap_child())) {
+                       continue;
+               }
+
+               /* Or internal domains (this should not be possible....) */
+               if (child->domain->internal) {
+                       continue;
+               }
+
+               /* Each winbindd child should only process requests for one domain - make sure
+                  we only set it online / offline for that domain. */
+
+               DEBUG(10,("winbind_msg_online: sending message to pid %u for domain %s.\n",
+                       (unsigned int)child->pid, child->domain->name ));
+
+               message_send_pid(pid_to_procid(child->pid), MSG_WINBIND_ONLINE, child->domain->name,
+                       strlen(child->domain->name)+1, False);
        }
 }
 
-static void account_lockout_policy_handler(struct timed_event *te,
+/* Forward the online/offline messages to our children. */
+void winbind_msg_onlinestatus(int msg_type, struct process_id src,
+                             void *buf, size_t len, void *private_data)
+{
+       struct winbindd_child *child;
+
+       DEBUG(10,("winbind_msg_onlinestatus: got onlinestatus message.\n"));
+
+       for (child = children; child != NULL; child = child->next) {
+               if (child->domain && child->domain->primary) {
+                       DEBUG(10,("winbind_msg_onlinestatus: "
+                                 "sending message to pid %u of primary domain.\n",
+                                 (unsigned int)child->pid));
+                       message_send_pid(pid_to_procid(child->pid), 
+                                        MSG_WINBIND_ONLINESTATUS, buf, len, False);
+                       break;
+               }
+       }
+}
+
+
+static void account_lockout_policy_handler(struct event_context *ctx,
+                                          struct timed_event *te,
                                           const struct timeval *now,
                                           void *private_data)
 {
-       struct winbindd_child *child = private_data;
-
+       struct winbindd_child *child =
+               (struct winbindd_child *)private_data;
+       TALLOC_CTX *mem_ctx = NULL;
        struct winbindd_methods *methods;
        SAM_UNK_INFO_12 lockout_policy;
        NTSTATUS result;
 
        DEBUG(10,("account_lockout_policy_handler called\n"));
 
-       if (child->lockout_policy_event) {
-               TALLOC_FREE(child->lockout_policy_event);
-       }
+       TALLOC_FREE(child->lockout_policy_event);
 
        methods = child->domain->methods;
 
-       result = methods->lockout_policy(child->domain, child->mem_ctx, &lockout_policy);
+       mem_ctx = talloc_init("account_lockout_policy_handler ctx");
+       if (!mem_ctx) {
+               result = NT_STATUS_NO_MEMORY;
+       } else {
+               result = methods->lockout_policy(child->domain, mem_ctx, &lockout_policy);
+       }
+
+       talloc_destroy(mem_ctx);
+
        if (!NT_STATUS_IS_OK(result)) {
-               DEBUG(10,("account_lockout_policy_handler: failed to call lockout_policy\n"));
-               return;
+               DEBUG(10,("account_lockout_policy_handler: lockout_policy failed error %s\n",
+                        nt_errstr(result)));
        }
 
-       child->lockout_policy_event = add_timed_event(child->mem_ctx, 
+       child->lockout_policy_event = event_add_timed(winbind_event_context(), NULL,
                                                      timeval_current_ofs(3600, 0),
                                                      "account_lockout_policy_handler",
                                                      account_lockout_policy_handler,
@@ -529,11 +748,17 @@ static void account_lockout_policy_handler(struct timed_event *te,
 
 /* Deal with a request to go offline. */
 
-static void child_msg_offline(int msg_type, struct process_id src, void *buf, size_t len)
+static void child_msg_offline(int msg_type, struct process_id src,
+                             void *buf, size_t len, void *private_data)
 {
        struct winbindd_domain *domain;
+       const char *domainname = (const char *)buf;
 
-       DEBUG(5,("child_msg_offline received.\n"));
+       if (buf == NULL || len == 0) {
+               return;
+       }
+
+       DEBUG(5,("child_msg_offline received for domain %s.\n", domainname));
 
        if (!lp_winbind_offline_logon()) {
                DEBUG(10,("child_msg_offline: rejecting offline message.\n"));
@@ -546,21 +771,32 @@ static void child_msg_offline(int msg_type, struct process_id src, void *buf, si
                return;
        }
 
-       /* Mark all our domains as offline. */
+       /* Mark the requested domain offline. */
 
        for (domain = domain_list(); domain; domain = domain->next) {
-               DEBUG(5,("child_msg_offline: marking %s offline.\n", domain->name));
-               domain->online = False;
+               if (domain->internal) {
+                       continue;
+               }
+               if (strequal(domain->name, domainname)) {
+                       DEBUG(5,("child_msg_offline: marking %s offline.\n", domain->name));
+                       set_domain_offline(domain);
+               }
        }
 }
 
 /* Deal with a request to go online. */
 
-static void child_msg_online(int msg_type, struct process_id src, void *buf, size_t len)
+static void child_msg_online(int msg_type, struct process_id src,
+                            void *buf, size_t len, void *private_data)
 {
        struct winbindd_domain *domain;
+       const char *domainname = (const char *)buf;
+
+       if (buf == NULL || len == 0) {
+               return;
+       }
 
-       DEBUG(5,("child_msg_online received.\n"));
+       DEBUG(5,("child_msg_online received for domain %s.\n", domainname));
 
        if (!lp_winbind_offline_logon()) {
                DEBUG(10,("child_msg_online: rejecting online message.\n"));
@@ -570,23 +806,86 @@ static void child_msg_online(int msg_type, struct process_id src, void *buf, siz
        /* Set our global state as online. */
        set_global_winbindd_state_online();
 
-       winbindd_flush_nscd_cache();
+       /* Try and mark everything online - delete any negative cache entries
+          to force a reconnect now. */
+
+       for (domain = domain_list(); domain; domain = domain->next) {
+               if (domain->internal) {
+                       continue;
+               }
+               if (strequal(domain->name, domainname)) {
+                       DEBUG(5,("child_msg_online: requesting %s to go online.\n", domain->name));
+                       winbindd_flush_negative_conn_cache(domain);
+                       set_domain_online_request(domain);
+               }
+       }
+}
+
+static const char *collect_onlinestatus(TALLOC_CTX *mem_ctx)
+{
+       struct winbindd_domain *domain;
+       char *buf = NULL;
 
-       /* Mark everything online - delete any negative cache entries
-          to force an immediate reconnect. */
+       if ((buf = talloc_asprintf(mem_ctx, "global:%s ", 
+                                  get_global_winbindd_state_offline() ? 
+                                  "Offline":"Online")) == NULL) {
+               return NULL;
+       }
 
        for (domain = domain_list(); domain; domain = domain->next) {
-               DEBUG(5,("child_msg_online: marking %s online.\n", domain->name));
-               domain->online = True;
-               check_negative_conn_cache_timeout(domain->name, domain->dcname, 0);
+               if ((buf = talloc_asprintf_append(buf, "%s:%s ", 
+                                                 domain->name, 
+                                                 domain->online ?
+                                                 "Online":"Offline")) == NULL) {
+                       return NULL;
+               }
        }
+
+       buf = talloc_asprintf_append(buf, "\n");
+
+       DEBUG(5,("collect_onlinestatus: %s", buf));
+
+       return buf;
+}
+
+static void child_msg_onlinestatus(int msg_type, struct process_id src,
+                                  void *buf, size_t len, void *private_data)
+{
+       TALLOC_CTX *mem_ctx;
+       const char *message;
+       struct process_id *sender;
+       
+       DEBUG(5,("winbind_msg_onlinestatus received.\n"));
+
+       if (!buf) {
+               return;
+       }
+
+       sender = (struct process_id *)buf;
+
+       mem_ctx = talloc_init("winbind_msg_onlinestatus");
+       if (mem_ctx == NULL) {
+               return;
+       }
+       
+       message = collect_onlinestatus(mem_ctx);
+       if (message == NULL) {
+               talloc_destroy(mem_ctx);
+               return;
+       }
+
+       message_send_pid(*sender, MSG_WINBIND_ONLINESTATUS, 
+                        message, strlen(message) + 1, True);
+
+       talloc_destroy(mem_ctx);
 }
 
 static BOOL fork_domain_child(struct winbindd_child *child)
 {
        int fdpair[2];
        struct winbindd_cli_state state;
-       extern BOOL override_logfile;
+       struct winbindd_domain *domain;
+       struct winbindd_domain *primary_domain = NULL;
 
        if (socketpair(AF_UNIX, SOCK_STREAM, 0, fdpair) != 0) {
                DEBUG(0, ("Could not open child pipe: %s\n",
@@ -595,7 +894,7 @@ static BOOL fork_domain_child(struct winbindd_child *child)
        }
 
        ZERO_STRUCT(state);
-       state.pid = getpid();
+       state.pid = sys_getpid();
 
        /* Ensure we don't process messages whilst we're
           changing the disposition for the child. */
@@ -625,6 +924,9 @@ static BOOL fork_domain_child(struct winbindd_child *child)
 
        /* Child */
 
+       /* Stop zombies in children */
+       CatchChild();
+
        state.sock = fdpair[0];
        close(fdpair[1]);
 
@@ -646,28 +948,67 @@ static BOOL fork_domain_child(struct winbindd_child *child)
        message_deregister(MSG_SHUTDOWN);
        message_deregister(MSG_WINBIND_OFFLINE);
        message_deregister(MSG_WINBIND_ONLINE);
+       message_deregister(MSG_WINBIND_ONLINESTATUS);
 
        /* The child is ok with online/offline messages now. */
        message_unblock();
 
-       child->mem_ctx = talloc_init("child_mem_ctx");
-       if (child->mem_ctx == NULL) {
-               return False;
+       /* Handle online/offline messages. */
+       message_register(MSG_WINBIND_OFFLINE, child_msg_offline, NULL);
+       message_register(MSG_WINBIND_ONLINE, child_msg_online, NULL);
+       message_register(MSG_WINBIND_ONLINESTATUS, child_msg_onlinestatus,
+                        NULL);
+
+       if ( child->domain ) {
+               child->domain->startup = True;
+               child->domain->startup_time = time(NULL);
+       }
+
+       /* Ensure we have no pending check_online events other
+          than one for this domain or the primary domain. */
+
+       for (domain = domain_list(); domain; domain = domain->next) {
+               if (domain->primary) {
+                       primary_domain = domain;
+               }
+               if ((domain != child->domain) && !domain->primary) {
+                       TALLOC_FREE(domain->check_online_event);
+               }
        }
 
-       if (child->domain != NULL) {
-               /* We might be in the idmap child...*/
-               child->lockout_policy_event = add_timed_event(
-                       child->mem_ctx, timeval_zero(),
+       /* Ensure we're not handling an event inherited from
+          our parent. */
+
+       cancel_named_event(winbind_event_context(),
+                          "krb5_ticket_refresh_handler");
+
+       /* We might be in the idmap child...*/
+       if (child->domain && !(child->domain->internal) &&
+           lp_winbind_offline_logon()) {
+
+               set_domain_online_request(child->domain);
+
+               if (primary_domain != child->domain) {
+                       /* We need to talk to the primary
+                        * domain as well as the trusted
+                        * domain inside a trusted domain
+                        * child.
+                        * See the code in :
+                        * winbindd_dual_pam_auth_samlogon()
+                        * especially the calling of 
+                        * contact_domain = find_our_domain()
+                        * in the non-DC case for details.
+                        */
+                       set_domain_online_request(primary_domain);
+               }
+
+               child->lockout_policy_event = event_add_timed(
+                       winbind_event_context(), NULL, timeval_zero(),
                        "account_lockout_policy_handler",
                        account_lockout_policy_handler,
                        child);
        }
 
-       /* Handle online/offline messages. */
-       message_register(MSG_WINBIND_OFFLINE,child_msg_offline);
-       message_register(MSG_WINBIND_ONLINE,child_msg_online);
-
        while (1) {
 
                int ret;
@@ -680,13 +1021,27 @@ static BOOL fork_domain_child(struct winbindd_child *child)
                lp_TALLOC_FREE();
                main_loop_TALLOC_FREE();
 
-               run_events();
+               /* check for signals */
+               winbind_check_sigterm(false);
+               winbind_check_sighup(override_logfile ? NULL :
+                       child->logfilename);
+
+               run_events(winbind_event_context(), 0, NULL, NULL);
 
                GetTimeOfDay(&now);
 
-               tp = get_timed_events_timeout(&t, (time_t)-1);
+               if (child->domain && child->domain->startup &&
+                               (now.tv_sec > child->domain->startup_time + 30)) {
+                       /* No longer in "startup" mode. */
+                       DEBUG(10,("fork_domain_child: domain %s no longer in 'startup' mode.\n",
+                               child->domain->name ));
+                       child->domain->startup = False;
+               }
+
+               tp = get_timed_events_timeout(winbind_event_context(), &t);
                if (tp) {
-                       DEBUG(11,("select will use timeout of %d seconds\n", (int)tp->tv_sec));
+                       DEBUG(11,("select will use timeout of %u.%u seconds\n",
+                               (unsigned int)tp->tv_sec, (unsigned int)tp->tv_usec ));
                }
 
                /* Handle messages */
@@ -728,17 +1083,18 @@ static BOOL fork_domain_child(struct winbindd_child *child)
                state.request.null_term = '\0';
                child_process_request(child->domain, &state);
 
-               SAFE_FREE(state.request.extra_data);
+               SAFE_FREE(state.request.extra_data.data);
 
                cache_store_response(sys_getpid(), &state.response);
 
-               SAFE_FREE(state.response.extra_data);
+               SAFE_FREE(state.response.extra_data.data);
 
                /* We just send the result code back, the result
                 * structure needs to be fetched via the
                 * winbindd_cache. Hmm. That needs fixing... */
 
-               if (write_data(state.sock, (void *)&state.response.result,
+               if (write_data(state.sock,
+                              (const char *)&state.response.result,
                               sizeof(state.response.result)) !=
                    sizeof(state.response.result)) {
                        DEBUG(0, ("Could not write result\n"));