Avoid dclose(RTLD_NEXT)
[socket_wrapper.git] / src / socket_wrapper.c
index 089f5d9dcc26ee206ab4946a9ae91a3cba3d047a..aefd526ddfe633f4e158138534a0d0b8d6ea3095 100644 (file)
@@ -2,8 +2,8 @@
  * BSD 3-Clause License
  *
  * Copyright (c) 2005-2008, Jelmer Vernooij <jelmer@samba.org>
- * Copyright (c) 2006-2018, Stefan Metzmacher <metze@samba.org>
- * Copyright (c) 2013-2018, Andreas Schneider <asn@samba.org>
+ * Copyright (c) 2006-2021, Stefan Metzmacher <metze@samba.org>
+ * Copyright (c) 2013-2021, Andreas Schneider <asn@samba.org>
  * Copyright (c) 2014-2017, Michael Adam <obnox@samba.org>
  * Copyright (c) 2016-2018, Anoop C S <anoopcs@redhat.com>
  * All rights reserved.
@@ -86,6 +86,8 @@
 #endif
 #include <pthread.h>
 
+#include "socket_wrapper.h"
+
 enum swrap_dbglvl_e {
        SWRAP_LOG_ERROR = 0,
        SWRAP_LOG_WARN,
@@ -183,7 +185,6 @@ enum swrap_dbglvl_e {
 
 /* Add new global locks here please */
 # define SWRAP_REINIT_ALL do { \
-       size_t __i; \
        int ret; \
        ret = socket_wrapper_init_mutex(&sockets_mutex); \
        if (ret != 0) exit(-1); \
@@ -191,10 +192,8 @@ enum swrap_dbglvl_e {
        if (ret != 0) exit(-1); \
        ret = socket_wrapper_init_mutex(&first_free_mutex); \
        if (ret != 0) exit(-1); \
-       for (__i = 0; (sockets != NULL) && __i < socket_info_max; __i++) { \
-               ret = socket_wrapper_init_mutex(&sockets[__i].meta.mutex); \
-               if (ret != 0) exit(-1); \
-       } \
+       ret = socket_wrapper_init_mutex(&sockets_si_global); \
+       if (ret != 0) exit(-1); \
        ret = socket_wrapper_init_mutex(&autobind_start_mutex); \
        if (ret != 0) exit(-1); \
        ret = socket_wrapper_init_mutex(&pcap_dump_mutex); \
@@ -204,27 +203,20 @@ enum swrap_dbglvl_e {
 } while(0)
 
 # define SWRAP_LOCK_ALL do { \
-       size_t __i; \
        swrap_mutex_lock(&sockets_mutex); \
        swrap_mutex_lock(&socket_reset_mutex); \
        swrap_mutex_lock(&first_free_mutex); \
-       for (__i = 0; (sockets != NULL) && __i < socket_info_max; __i++) { \
-               swrap_mutex_lock(&sockets[__i].meta.mutex); \
-       } \
+       swrap_mutex_lock(&sockets_si_global); \
        swrap_mutex_lock(&autobind_start_mutex); \
        swrap_mutex_lock(&pcap_dump_mutex); \
        swrap_mutex_lock(&mtu_update_mutex); \
 } while(0)
 
 # define SWRAP_UNLOCK_ALL do { \
-       size_t __s; \
        swrap_mutex_unlock(&mtu_update_mutex); \
        swrap_mutex_unlock(&pcap_dump_mutex); \
        swrap_mutex_unlock(&autobind_start_mutex); \
-       for (__s = 0; (sockets != NULL) && __s < socket_info_max; __s++) { \
-               size_t __i = (socket_info_max - 1) - __s; \
-               swrap_mutex_unlock(&sockets[__i].meta.mutex); \
-       } \
+       swrap_mutex_unlock(&sockets_si_global); \
        swrap_mutex_unlock(&first_free_mutex); \
        swrap_mutex_unlock(&socket_reset_mutex); \
        swrap_mutex_unlock(&sockets_mutex); \
@@ -235,12 +227,20 @@ enum swrap_dbglvl_e {
 
 #define SWRAP_LOCK_SI(si) do { \
        struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
-       swrap_mutex_lock(&sic->meta.mutex); \
+       if (sic != NULL) { \
+               swrap_mutex_lock(&sockets_si_global); \
+       } else { \
+               abort(); \
+       } \
 } while(0)
 
 #define SWRAP_UNLOCK_SI(si) do { \
        struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
-       swrap_mutex_unlock(&sic->meta.mutex); \
+       if (sic != NULL) { \
+               swrap_mutex_unlock(&sockets_si_global); \
+       } else { \
+               abort(); \
+       } \
 } while(0)
 
 #if defined(HAVE_GETTIMEOFDAY_TZ) || defined(HAVE_GETTIMEOFDAY_TZ_VOID)
@@ -302,6 +302,11 @@ static int first_free;
 
 struct socket_info
 {
+       /*
+        * Remember to update swrap_unix_scm_right_magic
+        * on any change.
+        */
+
        int family;
        int type;
        int protocol;
@@ -313,6 +318,7 @@ struct socket_info
        int pktinfo;
        int tcp_nodelay;
        int listening;
+       int fd_passed;
 
        /* The unix path so we can unlink it on close() */
        struct sockaddr_un un_addr;
@@ -331,7 +337,13 @@ struct socket_info_meta
 {
        unsigned int refcount;
        int next_free;
-       pthread_mutex_t mutex;
+       /*
+        * As long as we don't use shared memory
+        * for the sockets array, we use
+        * sockets_si_global as a single mutex.
+        *
+        * pthread_mutex_t mutex;
+        */
 };
 
 struct socket_info_container
@@ -360,12 +372,20 @@ static pthread_mutex_t autobind_start_mutex = PTHREAD_MUTEX_INITIALIZER;
 /* Mutex to guard the initialization of array of socket_info structures */
 static pthread_mutex_t sockets_mutex = PTHREAD_MUTEX_INITIALIZER;
 
-/* Mutex to guard the socket reset in swrap_close() and swrap_remove_stale() */
+/* Mutex to guard the socket reset in swrap_remove_wrapper() */
 static pthread_mutex_t socket_reset_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 /* Mutex to synchronize access to first free index in socket_info array */
 static pthread_mutex_t first_free_mutex = PTHREAD_MUTEX_INITIALIZER;
 
+/*
+ * Mutex to synchronize access to to socket_info structures
+ * We use a single global mutex in order to avoid leaking
+ * ~ 38M copy on write memory per fork.
+ * max_sockets=65535 * sizeof(struct socket_info_container)=592 = 38796720
+ */
+static pthread_mutex_t sockets_si_global = PTHREAD_MUTEX_INITIALIZER;
+
 /* Mutex to synchronize access to packet capture dump file */
 static pthread_mutex_t pcap_dump_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -374,8 +394,6 @@ static pthread_mutex_t mtu_update_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 /* Function prototypes */
 
-bool socket_wrapper_enabled(void);
-
 #if ! defined(HAVE_CONSTRUCTOR_ATTRIBUTE) && defined(HAVE_PRAGMA_INIT)
 /* xlC and other oldschool compilers support (only) this */
 #pragma init (swrap_constructor)
@@ -474,6 +492,9 @@ typedef int (*__libc_bind)(int sockfd,
                           const struct sockaddr *addr,
                           socklen_t addrlen);
 typedef int (*__libc_close)(int fd);
+#ifdef HAVE___CLOSE_NOCANCEL
+typedef int (*__libc___close_nocancel)(int fd);
+#endif
 typedef int (*__libc_connect)(int sockfd,
                              const struct sockaddr *addr,
                              socklen_t addrlen);
@@ -554,6 +575,9 @@ struct swrap_libc_symbols {
 #endif
        SWRAP_SYMBOL_ENTRY(bind);
        SWRAP_SYMBOL_ENTRY(close);
+#ifdef HAVE___CLOSE_NOCANCEL
+       SWRAP_SYMBOL_ENTRY(__close_nocancel);
+#endif
        SWRAP_SYMBOL_ENTRY(connect);
        SWRAP_SYMBOL_ENTRY(dup);
        SWRAP_SYMBOL_ENTRY(dup2);
@@ -749,6 +773,7 @@ static void _swrap_mutex_lock(pthread_mutex_t *mutex, const char *name, const ch
        if (ret != 0) {
                SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't lock pthread mutex(%s) - %s",
                          getpid(), getppid(), caller, line, name, strerror(ret));
+               abort();
        }
 }
 
@@ -761,6 +786,7 @@ static void _swrap_mutex_unlock(pthread_mutex_t *mutex, const char *name, const
        if (ret != 0) {
                SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't unlock pthread mutex(%s) - %s",
                          getpid(), getppid(), caller, line, name, strerror(ret));
+               abort();
        }
 }
 
@@ -831,6 +857,15 @@ static int libc_close(int fd)
        return swrap.libc.symbols._libc_close.f(fd);
 }
 
+#ifdef HAVE___CLOSE_NOCANCEL
+static int libc___close_nocancel(int fd)
+{
+       swrap_bind_symbol_all();
+
+       return swrap.libc.symbols._libc___close_nocancel.f(fd);
+}
+#endif /* HAVE___CLOSE_NOCANCEL */
+
 static int libc_connect(int sockfd,
                        const struct sockaddr *addr,
                        socklen_t addrlen)
@@ -1179,6 +1214,9 @@ static void __swrap_bind_symbol_all_once(void)
 #endif
        swrap_bind_symbol_libsocket(bind);
        swrap_bind_symbol_libc(close);
+#ifdef HAVE___CLOSE_NOCANCEL
+       swrap_bind_symbol_libc(__close_nocancel);
+#endif
        swrap_bind_symbol_libsocket(connect);
        swrap_bind_symbol_libc(dup);
        swrap_bind_symbol_libc(dup2);
@@ -1699,27 +1737,18 @@ static void socket_wrapper_init_sockets(void)
        }
 
        swrap_mutex_lock(&first_free_mutex);
+       swrap_mutex_lock(&sockets_si_global);
 
        first_free = 0;
 
        for (i = 0; i < max_sockets; i++) {
                swrap_set_next_free(&sockets[i].info, i+1);
-               sockets[i].meta.mutex = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
-       }
-
-       for (i = 0; i < max_sockets; i++) {
-               ret = socket_wrapper_init_mutex(&sockets[i].meta.mutex);
-               if (ret != 0) {
-                       SWRAP_LOG(SWRAP_LOG_ERROR,
-                                 "Failed to initialize pthread mutex i=%zu", i);
-                       goto done;
-               }
        }
 
        /* mark the end of the free list */
        swrap_set_next_free(&sockets[max_sockets-1].info, -1);
 
-done:
+       swrap_mutex_unlock(&sockets_si_global);
        swrap_mutex_unlock(&first_free_mutex);
        swrap_mutex_unlock(&sockets_mutex);
        if (ret != 0) {
@@ -1811,7 +1840,7 @@ static int find_socket_info_index(int fd)
        return socket_fds_idx[fd];
 }
 
-static int swrap_add_socket_info(struct socket_info *si_input)
+static int swrap_add_socket_info(const struct socket_info *si_input)
 {
        struct socket_info *si = NULL;
        int si_index = -1;
@@ -1854,6 +1883,7 @@ static int swrap_create_socket(struct socket_info *si, int fd)
                          "trying to add %d",
                          socket_fds_max,
                          fd);
+               errno = EMFILE;
                return -1;
        }
 
@@ -1878,31 +1908,40 @@ static int convert_un_in(const struct sockaddr_un *un, struct sockaddr *in, sock
        if (p) p++; else p = un->sun_path;
 
        if (sscanf(p, SOCKET_FORMAT, &type, &iface, &prt) != 3) {
+               SWRAP_LOG(SWRAP_LOG_ERROR, "sun_path[%s] p[%s]",
+                         un->sun_path, p);
                errno = EINVAL;
                return -1;
        }
 
-       SWRAP_LOG(SWRAP_LOG_TRACE, "type %c iface %u port %u",
-                       type, iface, prt);
-
        if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
+               SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
+                         type, iface, prt);
                errno = EINVAL;
                return -1;
        }
 
        if (prt > 0xFFFF) {
+               SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
+                         type, iface, prt);
                errno = EINVAL;
                return -1;
        }
 
+       SWRAP_LOG(SWRAP_LOG_TRACE, "type %c iface %u port %u",
+                 type, iface, prt);
+
        switch(type) {
        case SOCKET_TYPE_CHAR_TCP:
        case SOCKET_TYPE_CHAR_UDP: {
                struct sockaddr_in *in2 = (struct sockaddr_in *)(void *)in;
 
                if ((*len) < sizeof(*in2)) {
-                   errno = EINVAL;
-                   return -1;
+                       SWRAP_LOG(SWRAP_LOG_ERROR,
+                                 "V4: *len(%zu) < sizeof(*in2)=%zu",
+                                 (size_t)*len, sizeof(*in2));
+                       errno = EINVAL;
+                       return -1;
                }
 
                memset(in2, 0, sizeof(*in2));
@@ -1919,6 +1958,10 @@ static int convert_un_in(const struct sockaddr_un *un, struct sockaddr *in, sock
                struct sockaddr_in6 *in2 = (struct sockaddr_in6 *)(void *)in;
 
                if ((*len) < sizeof(*in2)) {
+                       SWRAP_LOG(SWRAP_LOG_ERROR,
+                                 "V6: *len(%zu) < sizeof(*in2)=%zu",
+                                 (size_t)*len, sizeof(*in2));
+                       SWRAP_LOG(SWRAP_LOG_ERROR, "LINE:%d", __LINE__);
                        errno = EINVAL;
                        return -1;
                }
@@ -1934,6 +1977,8 @@ static int convert_un_in(const struct sockaddr_un *un, struct sockaddr *in, sock
        }
 #endif
        default:
+               SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
+                         type, iface, prt);
                errno = EINVAL;
                return -1;
        }
@@ -2000,6 +2045,13 @@ static int convert_in_un_remote(struct socket_info *si, const struct sockaddr *i
                        type = u_type;
                        iface = (addr & 0x000000FF);
                } else {
+                       char str[256] = {0,};
+                       inet_ntop(inaddr->sa_family,
+                                 &in->sin_addr,
+                                 str, sizeof(str));
+                       SWRAP_LOG(SWRAP_LOG_WARN,
+                                 "str[%s] prt[%u]",
+                                 str, (unsigned)prt);
                        errno = ENETUNREACH;
                        return -1;
                }
@@ -2035,6 +2087,13 @@ static int convert_in_un_remote(struct socket_info *si, const struct sockaddr *i
                if (IN6_ARE_ADDR_EQUAL(&cmp1, &cmp2)) {
                        iface = in->sin6_addr.s6_addr[15];
                } else {
+                       char str[256] = {0,};
+                       inet_ntop(inaddr->sa_family,
+                                 &in->sin6_addr,
+                                 str, sizeof(str));
+                       SWRAP_LOG(SWRAP_LOG_WARN,
+                                 "str[%s] prt[%u]",
+                                 str, (unsigned)prt);
                        errno = ENETUNREACH;
                        return -1;
                }
@@ -2363,46 +2422,7 @@ static bool check_addr_port_in_use(const struct sockaddr *sa, socklen_t len)
 }
 #endif
 
-static void swrap_remove_stale(int fd)
-{
-       struct socket_info *si;
-       int si_index;
-
-       SWRAP_LOG(SWRAP_LOG_TRACE, "remove stale wrapper for %d", fd);
-
-       swrap_mutex_lock(&socket_reset_mutex);
-
-       si_index = find_socket_info_index(fd);
-       if (si_index == -1) {
-               swrap_mutex_unlock(&socket_reset_mutex);
-               return;
-       }
-
-       reset_socket_info_index(fd);
-
-       si = swrap_get_socket_info(si_index);
-
-       swrap_mutex_lock(&first_free_mutex);
-       SWRAP_LOCK_SI(si);
-
-       swrap_dec_refcount(si);
-
-       if (swrap_get_refcount(si) > 0) {
-               goto out;
-       }
-
-       if (si->un_addr.sun_path[0] != '\0') {
-               unlink(si->un_addr.sun_path);
-       }
-
-       swrap_set_next_free(si, first_free);
-       first_free = si_index;
-
-out:
-       SWRAP_UNLOCK_SI(si);
-       swrap_mutex_unlock(&first_free_mutex);
-       swrap_mutex_unlock(&socket_reset_mutex);
-}
+static void swrap_remove_stale(int fd);
 
 static int sockaddr_convert_to_un(struct socket_info *si,
                                  const struct sockaddr *in_addr,
@@ -2963,8 +2983,8 @@ static int swrap_pcap_get_fd(const char *fname)
                file_hdr.frame_max_len  = SWRAP_FRAME_LENGTH_MAX;
                file_hdr.link_type      = 0x0065; /* 101 RAW IP */
 
-               if (write(fd, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
-                       close(fd);
+               if (libc_write(fd, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
+                       libc_close(fd);
                        fd = -1;
                }
                return fd;
@@ -3298,7 +3318,7 @@ static void swrap_pcap_dump_packet(struct socket_info *si,
 
        fd = swrap_pcap_get_fd(file_name);
        if (fd != -1) {
-               if (write(fd, packet, packet_len) != (ssize_t)packet_len) {
+               if (libc_write(fd, packet, packet_len) != (ssize_t)packet_len) {
                        free(packet);
                        goto done;
                }
@@ -3469,6 +3489,9 @@ static int swrap_socket(int family, int type, int protocol)
 
        ret = swrap_create_socket(si, fd);
        if (ret == -1) {
+               int saved_errno = errno;
+               libc_close(fd);
+               errno = saved_errno;
                return -1;
        }
 
@@ -3617,10 +3640,12 @@ static int swrap_accept(int s,
        ret = libc_accept(s, &un_addr.sa.s, &un_addr.sa_socklen);
 #endif
        if (ret == -1) {
-               if (errno == ENOTSOCK) {
+               int saved_errno = errno;
+               if (saved_errno == ENOTSOCK) {
                        /* Remove stale fds */
                        swrap_remove_stale(s);
                }
+               errno = saved_errno;
                return ret;
        }
 
@@ -3629,6 +3654,50 @@ static int swrap_accept(int s,
        /* Check if we have a stale fd and remove it */
        swrap_remove_stale(fd);
 
+       if (un_addr.sa.un.sun_path[0] == '\0') {
+               /*
+                * FreeBSD seems to have a problem where
+                * accept4() on the unix socket doesn't
+                * ECONNABORTED for already disconnected connections.
+                *
+                * Let's try libc_getpeername() to get the peer address
+                * as a fallback, but it'll likely return ENOTCONN,
+                * which we have to map to ECONNABORTED.
+                */
+               un_addr.sa_socklen = sizeof(struct sockaddr_un),
+               ret = libc_getpeername(fd, &un_addr.sa.s, &un_addr.sa_socklen);
+               if (ret == -1) {
+                       int saved_errno = errno;
+                       libc_close(fd);
+                       if (saved_errno == ENOTCONN) {
+                               /*
+                                * If the connection is already disconnected
+                                * we should return ECONNABORTED.
+                                */
+                               saved_errno = ECONNABORTED;
+                       }
+                       errno = saved_errno;
+                       return ret;
+               }
+       }
+
+       ret = libc_getsockname(fd,
+                              &un_my_addr.sa.s,
+                              &un_my_addr.sa_socklen);
+       if (ret == -1) {
+               int saved_errno = errno;
+               libc_close(fd);
+               if (saved_errno == ENOTCONN) {
+                       /*
+                        * If the connection is already disconnected
+                        * we should return ECONNABORTED.
+                        */
+                       saved_errno = ECONNABORTED;
+               }
+               errno = saved_errno;
+               return ret;
+       }
+
        SWRAP_LOCK_SI(parent_si);
 
        ret = sockaddr_convert_from_un(parent_si,
@@ -3638,8 +3707,10 @@ static int swrap_accept(int s,
                                       &in_addr.sa.s,
                                       &in_addr.sa_socklen);
        if (ret == -1) {
+               int saved_errno = errno;
                SWRAP_UNLOCK_SI(parent_si);
-               close(fd);
+               libc_close(fd);
+               errno = saved_errno;
                return ret;
        }
 
@@ -3667,14 +3738,6 @@ static int swrap_accept(int s,
                *addrlen = in_addr.sa_socklen;
        }
 
-       ret = libc_getsockname(fd,
-                              &un_my_addr.sa.s,
-                              &un_my_addr.sa_socklen);
-       if (ret == -1) {
-               close(fd);
-               return ret;
-       }
-
        ret = sockaddr_convert_from_un(child_si,
                                       &un_my_addr.sa.un,
                                       un_my_addr.sa_socklen,
@@ -3682,7 +3745,9 @@ static int swrap_accept(int s,
                                       &in_my_addr.sa.s,
                                       &in_my_addr.sa_socklen);
        if (ret == -1) {
-               close(fd);
+               int saved_errno = errno;
+               libc_close(fd);
+               errno = saved_errno;
                return ret;
        }
 
@@ -3697,7 +3762,9 @@ static int swrap_accept(int s,
 
        idx = swrap_create_socket(&new_si, fd);
        if (idx == -1) {
-               close (fd);
+               int saved_errno = errno;
+               libc_close(fd);
+               errno = saved_errno;
                return -1;
        }
 
@@ -3748,7 +3815,6 @@ static int swrap_auto_bind(int fd, struct socket_info *si, int family)
        char type;
        int ret;
        int port;
-       struct stat st;
        char *swrap_dir = NULL;
 
        swrap_mutex_lock(&autobind_start_mutex);
@@ -3849,10 +3915,12 @@ static int swrap_auto_bind(int fd, struct socket_info *si, int family)
                              type,
                              socket_wrapper_default_iface(),
                              port);
-               if (stat(un_addr.sa.un.sun_path, &st) == 0) continue;
 
                ret = libc_bind(fd, &un_addr.sa.s, un_addr.sa_socklen);
                if (ret == -1) {
+                       if (errno == EALREADY || errno == EADDRINUSE) {
+                               continue;
+                       }
                        goto done;
                }
 
@@ -5146,118 +5214,992 @@ static int swrap_sendmsg_filter_cmsg_sol_socket(const struct cmsghdr *cmsg,
        return rc;
 }
 
-#endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
+static const uint64_t swrap_unix_scm_right_magic = 0x8e0e13f27c42fc36;
 
-static ssize_t swrap_sendmsg_before(int fd,
-                                   struct socket_info *si,
-                                   struct msghdr *msg,
-                                   struct iovec *tmp_iov,
-                                   struct sockaddr_un *tmp_un,
-                                   const struct sockaddr_un **to_un,
-                                   const struct sockaddr **to,
-                                   int *bcast)
+/*
+ * We only allow up to 6 fds at a time
+ * as that's more than enough for Samba
+ * and it means we can keep the logic simple
+ * and work with fixed size arrays.
+ *
+ * We also keep sizeof(struct swrap_unix_scm_rights)
+ * under PIPE_BUF (4096) in order to allow a non-blocking
+ * write into the pipe.
+ */
+#ifndef PIPE_BUF
+#define PIPE_BUF 4096
+#endif
+#define SWRAP_MAX_PASSED_FDS ((size_t)6)
+#define SWRAP_MAX_PASSED_SOCKET_INFO SWRAP_MAX_PASSED_FDS
+struct swrap_unix_scm_rights_payload {
+       uint8_t num_idxs;
+       int8_t idxs[SWRAP_MAX_PASSED_FDS];
+       struct socket_info infos[SWRAP_MAX_PASSED_SOCKET_INFO];
+};
+struct swrap_unix_scm_rights {
+       uint64_t magic;
+       char package_name[sizeof(SOCKET_WRAPPER_PACKAGE)];
+       char package_version[sizeof(SOCKET_WRAPPER_VERSION)];
+       uint32_t full_size;
+       uint32_t payload_size;
+       struct swrap_unix_scm_rights_payload payload;
+};
+
+static void swrap_dec_fd_passed_array(size_t num, struct socket_info **array)
 {
-       size_t i, len = 0;
-       ssize_t ret = -1;
+       int saved_errno = errno;
+       size_t i;
 
-       if (to_un) {
-               *to_un = NULL;
-       }
-       if (to) {
-               *to = NULL;
-       }
-       if (bcast) {
-               *bcast = 0;
+       for (i = 0; i < num; i++) {
+               struct socket_info *si = array[i];
+               if (si == NULL) {
+                       continue;
+               }
+
+               SWRAP_LOCK_SI(si);
+               swrap_dec_refcount(si);
+               if (si->fd_passed > 0) {
+                       si->fd_passed -= 1;
+               }
+               SWRAP_UNLOCK_SI(si);
+               array[i] = NULL;
        }
 
-       SWRAP_LOCK_SI(si);
+       errno = saved_errno;
+}
 
-       switch (si->type) {
-       case SOCK_STREAM: {
-               unsigned long mtu;
+static void swrap_undo_si_idx_array(size_t num, int *array)
+{
+       int saved_errno = errno;
+       size_t i;
 
-               if (!si->connected) {
-                       errno = ENOTCONN;
-                       goto out;
-               }
+       swrap_mutex_lock(&first_free_mutex);
 
-               if (msg->msg_iovlen == 0) {
-                       break;
-               }
+       for (i = 0; i < num; i++) {
+               struct socket_info *si = NULL;
 
-               mtu = socket_wrapper_mtu();
-               for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
-                       size_t nlen;
-                       nlen = len + msg->msg_iov[i].iov_len;
-                       if (nlen < len) {
-                               /* overflow */
-                               errno = EMSGSIZE;
-                               goto out;
-                       }
-                       if (nlen > mtu) {
-                               break;
-                       }
+               if (array[i] == -1) {
+                       continue;
                }
-               msg->msg_iovlen = i;
-               if (msg->msg_iovlen == 0) {
-                       *tmp_iov = msg->msg_iov[0];
-                       tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
-                                              (size_t)mtu);
-                       msg->msg_iov = tmp_iov;
-                       msg->msg_iovlen = 1;
+
+               si = swrap_get_socket_info(array[i]);
+               if (si == NULL) {
+                       continue;
                }
-               break;
-       }
-       case SOCK_DGRAM:
-               if (si->connected) {
-                       if (msg->msg_name != NULL) {
-                               /*
-                                * We are dealing with unix sockets and if we
-                                * are connected, we should only talk to the
-                                * connected unix path. Using the fd to send
-                                * to another server would be hard to achieve.
-                                */
-                               msg->msg_name = NULL;
-                               msg->msg_namelen = 0;
-                       }
-               } else {
-                       const struct sockaddr *msg_name;
-                       msg_name = (const struct sockaddr *)msg->msg_name;
 
-                       if (msg_name == NULL) {
-                               errno = ENOTCONN;
-                               goto out;
-                       }
+               SWRAP_LOCK_SI(si);
+               swrap_dec_refcount(si);
+               SWRAP_UNLOCK_SI(si);
 
+               swrap_set_next_free(si, first_free);
+               first_free = array[i];
+               array[i] = -1;
+       }
 
-                       ret = sockaddr_convert_to_un(si, msg_name, msg->msg_namelen,
-                                                    tmp_un, 0, bcast);
-                       if (ret == -1) {
-                               goto out;
-                       }
+       swrap_mutex_unlock(&first_free_mutex);
+       errno = saved_errno;
+}
 
-                       if (to_un) {
-                               *to_un = tmp_un;
-                       }
-                       if (to) {
-                               *to = msg_name;
-                       }
-                       msg->msg_name = tmp_un;
-                       msg->msg_namelen = sizeof(*tmp_un);
-               }
+static void swrap_close_fd_array(size_t num, const int *array)
+{
+       int saved_errno = errno;
+       size_t i;
 
-               if (si->bound == 0) {
-                       ret = swrap_auto_bind(fd, si, si->family);
-                       if (ret == -1) {
-                               SWRAP_UNLOCK_SI(si);
-                               if (errno == ENOTSOCK) {
-                                       swrap_remove_stale(fd);
-                                       ret = -ENOTSOCK;
-                               } else {
-                                       SWRAP_LOG(SWRAP_LOG_ERROR, "swrap_sendmsg_before failed");
-                               }
-                               return ret;
-                       }
+       for (i = 0; i < num; i++) {
+               if (array[i] == -1) {
+                       continue;
+               }
+               libc_close(array[i]);
+       }
+
+       errno = saved_errno;
+}
+
+union __swrap_fds {
+       const uint8_t *p;
+       int *fds;
+};
+
+union __swrap_cmsghdr {
+       const uint8_t *p;
+       struct cmsghdr *cmsg;
+};
+
+static int swrap_sendmsg_unix_scm_rights(struct cmsghdr *cmsg,
+                                        uint8_t **cm_data,
+                                        size_t *cm_data_space,
+                                        int *scm_rights_pipe_fd)
+{
+       struct swrap_unix_scm_rights info;
+       struct swrap_unix_scm_rights_payload *payload = NULL;
+       int si_idx_array[SWRAP_MAX_PASSED_FDS];
+       struct socket_info *si_array[SWRAP_MAX_PASSED_FDS] = { NULL, };
+       size_t info_idx = 0;
+       size_t size_fds_in;
+       size_t num_fds_in;
+       union __swrap_fds __fds_in = { .p = NULL, };
+       const int *fds_in = NULL;
+       size_t num_fds_out;
+       size_t size_fds_out;
+       union __swrap_fds __fds_out = { .p = NULL, };
+       int *fds_out = NULL;
+       size_t cmsg_len;
+       size_t cmsg_space;
+       size_t new_cm_data_space;
+       union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
+       struct cmsghdr *new_cmsg = NULL;
+       uint8_t *p = NULL;
+       size_t i;
+       int pipefd[2] = { -1, -1 };
+       int rc;
+       ssize_t sret;
+
+       /*
+        * We pass this a buffer to the kernel make sure any padding
+        * is also cleared.
+        */
+       ZERO_STRUCT(info);
+       info.magic = swrap_unix_scm_right_magic;
+       memcpy(info.package_name,
+              SOCKET_WRAPPER_PACKAGE,
+              sizeof(info.package_name));
+       memcpy(info.package_version,
+              SOCKET_WRAPPER_VERSION,
+              sizeof(info.package_version));
+       info.full_size = sizeof(info);
+       info.payload_size = sizeof(info.payload);
+       payload = &info.payload;
+
+       if (*scm_rights_pipe_fd != -1) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "Two SCM_RIGHTS headers are not supported by socket_wrapper");
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (cmsg->cmsg_len < CMSG_LEN(0)) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
+                         (size_t)cmsg->cmsg_len,
+                         CMSG_LEN(0));
+               errno = EINVAL;
+               return -1;
+       }
+       size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
+       if ((size_fds_in % sizeof(int)) != 0) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
+                         (size_t)cmsg->cmsg_len,
+                         size_fds_in,
+                         sizeof(int));
+               errno = EINVAL;
+               return -1;
+       }
+       num_fds_in = size_fds_in / sizeof(int);
+       if (num_fds_in > SWRAP_MAX_PASSED_FDS) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
+                         "num_fds_in=%zu > "
+                         "SWRAP_MAX_PASSED_FDS(%zu)",
+                         (size_t)cmsg->cmsg_len,
+                         size_fds_in,
+                         num_fds_in,
+                         SWRAP_MAX_PASSED_FDS);
+               errno = EINVAL;
+               return -1;
+       }
+       if (num_fds_in == 0) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
+                         "num_fds_in=%zu",
+                         (size_t)cmsg->cmsg_len,
+                         size_fds_in,
+                         num_fds_in);
+               errno = EINVAL;
+               return -1;
+       }
+       __fds_in.p = CMSG_DATA(cmsg);
+       fds_in = __fds_in.fds;
+       num_fds_out = num_fds_in + 1;
+
+       SWRAP_LOG(SWRAP_LOG_TRACE,
+                 "num_fds_in=%zu num_fds_out=%zu",
+                 num_fds_in, num_fds_out);
+
+       size_fds_out = sizeof(int) * num_fds_out;
+       cmsg_len = CMSG_LEN(size_fds_out);
+       cmsg_space = CMSG_SPACE(size_fds_out);
+
+       new_cm_data_space = *cm_data_space + cmsg_space;
+
+       p = realloc((*cm_data), new_cm_data_space);
+       if (p == NULL) {
+               return -1;
+       }
+       (*cm_data) = p;
+       p = (*cm_data) + (*cm_data_space);
+       memset(p, 0, cmsg_space);
+       __new_cmsg.p = p;
+       new_cmsg = __new_cmsg.cmsg;
+       *new_cmsg = *cmsg;
+       __fds_out.p = CMSG_DATA(new_cmsg);
+       fds_out = __fds_out.fds;
+       memcpy(fds_out, fds_in, size_fds_in);
+       new_cmsg->cmsg_len = cmsg->cmsg_len;
+
+       for (i = 0; i < num_fds_in; i++) {
+               size_t j;
+
+               payload->idxs[i] = -1;
+               payload->num_idxs++;
+
+               si_idx_array[i] = find_socket_info_index(fds_in[i]);
+               if (si_idx_array[i] == -1) {
+                       continue;
+               }
+
+               si_array[i] = swrap_get_socket_info(si_idx_array[i]);
+               if (si_array[i] == NULL) {
+                       SWRAP_LOG(SWRAP_LOG_ERROR,
+                                 "fds_in[%zu]=%d si_idx_array[%zu]=%d missing!",
+                                 i, fds_in[i], i, si_idx_array[i]);
+                       errno = EINVAL;
+                       return -1;
+               }
+
+               for (j = 0; j < i; j++) {
+                       if (si_array[j] == si_array[i]) {
+                               payload->idxs[i] = payload->idxs[j];
+                               break;
+                       }
+               }
+               if (payload->idxs[i] == -1) {
+                       if (info_idx >= SWRAP_MAX_PASSED_SOCKET_INFO) {
+                               SWRAP_LOG(SWRAP_LOG_ERROR,
+                                         "fds_in[%zu]=%d,si_idx_array[%zu]=%d: "
+                                         "info_idx=%zu >= SWRAP_MAX_PASSED_FDS(%zu)!",
+                                         i, fds_in[i], i, si_idx_array[i],
+                                         info_idx,
+                                         SWRAP_MAX_PASSED_SOCKET_INFO);
+                               errno = EINVAL;
+                               return -1;
+                       }
+                       payload->idxs[i] = info_idx;
+                       info_idx += 1;
+                       continue;
+               }
+       }
+
+       for (i = 0; i < num_fds_in; i++) {
+               struct socket_info *si = si_array[i];
+
+               if (si == NULL) {
+                       SWRAP_LOG(SWRAP_LOG_TRACE,
+                                 "fds_in[%zu]=%d not an inet socket",
+                                 i, fds_in[i]);
+                       continue;
+               }
+
+               SWRAP_LOG(SWRAP_LOG_TRACE,
+                         "fds_in[%zu]=%d si_idx_array[%zu]=%d "
+                         "passing as info.idxs[%zu]=%d!",
+                         i, fds_in[i],
+                         i, si_idx_array[i],
+                         i, payload->idxs[i]);
+
+               SWRAP_LOCK_SI(si);
+               si->fd_passed += 1;
+               payload->infos[payload->idxs[i]] = *si;
+               payload->infos[payload->idxs[i]].fd_passed = 0;
+               SWRAP_UNLOCK_SI(si);
+       }
+
+       rc = pipe(pipefd);
+       if (rc == -1) {
+               int saved_errno = errno;
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "pipe() failed - %d %s",
+                         saved_errno,
+                         strerror(saved_errno));
+               swrap_dec_fd_passed_array(num_fds_in, si_array);
+               errno = saved_errno;
+               return -1;
+       }
+
+       sret = libc_write(pipefd[1], &info, sizeof(info));
+       if (sret != sizeof(info)) {
+               int saved_errno = errno;
+               if (sret != -1) {
+                       saved_errno = EINVAL;
+               }
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "write() failed - sret=%zd - %d %s",
+                         sret, saved_errno,
+                         strerror(saved_errno));
+               swrap_dec_fd_passed_array(num_fds_in, si_array);
+               libc_close(pipefd[1]);
+               libc_close(pipefd[0]);
+               errno = saved_errno;
+               return -1;
+       }
+       libc_close(pipefd[1]);
+
+       /*
+        * Add the pipe read end to the end of the passed fd array
+        */
+       fds_out[num_fds_in] = pipefd[0];
+       new_cmsg->cmsg_len = cmsg_len;
+
+       /* we're done ... */
+       *scm_rights_pipe_fd = pipefd[0];
+       *cm_data_space = new_cm_data_space;
+
+       return 0;
+}
+
+static int swrap_sendmsg_unix_sol_socket(struct cmsghdr *cmsg,
+                                        uint8_t **cm_data,
+                                        size_t *cm_data_space,
+                                        int *scm_rights_pipe_fd)
+{
+       int rc = -1;
+
+       switch (cmsg->cmsg_type) {
+       case SCM_RIGHTS:
+               rc = swrap_sendmsg_unix_scm_rights(cmsg,
+                                                  cm_data,
+                                                  cm_data_space,
+                                                  scm_rights_pipe_fd);
+               break;
+       default:
+               rc = swrap_sendmsg_copy_cmsg(cmsg,
+                                            cm_data,
+                                            cm_data_space);
+               break;
+       }
+
+       return rc;
+}
+
+static int swrap_recvmsg_unix_scm_rights(struct cmsghdr *cmsg,
+                                        uint8_t **cm_data,
+                                        size_t *cm_data_space)
+{
+       int scm_rights_pipe_fd = -1;
+       struct swrap_unix_scm_rights info;
+       struct swrap_unix_scm_rights_payload *payload = NULL;
+       int si_idx_array[SWRAP_MAX_PASSED_FDS];
+       size_t size_fds_in;
+       size_t num_fds_in;
+       union __swrap_fds __fds_in = { .p = NULL, };
+       const int *fds_in = NULL;
+       size_t num_fds_out;
+       size_t size_fds_out;
+       union __swrap_fds __fds_out = { .p = NULL, };
+       int *fds_out = NULL;
+       size_t cmsg_len;
+       size_t cmsg_space;
+       size_t new_cm_data_space;
+       union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
+       struct cmsghdr *new_cmsg = NULL;
+       uint8_t *p = NULL;
+       ssize_t sret;
+       size_t i;
+       int cmp;
+
+       if (cmsg->cmsg_len < CMSG_LEN(0)) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
+                         (size_t)cmsg->cmsg_len,
+                         CMSG_LEN(0));
+               errno = EINVAL;
+               return -1;
+       }
+       size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
+       if ((size_fds_in % sizeof(int)) != 0) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
+                         (size_t)cmsg->cmsg_len,
+                         size_fds_in,
+                         sizeof(int));
+               errno = EINVAL;
+               return -1;
+       }
+       num_fds_in = size_fds_in / sizeof(int);
+       if (num_fds_in > (SWRAP_MAX_PASSED_FDS + 1)) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
+                         "num_fds_in=%zu > SWRAP_MAX_PASSED_FDS+1(%zu)",
+                         (size_t)cmsg->cmsg_len,
+                         size_fds_in,
+                         num_fds_in,
+                         SWRAP_MAX_PASSED_FDS+1);
+               errno = EINVAL;
+               return -1;
+       }
+       if (num_fds_in <= 1) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
+                         "num_fds_in=%zu",
+                         (size_t)cmsg->cmsg_len,
+                         size_fds_in,
+                         num_fds_in);
+               errno = EINVAL;
+               return -1;
+       }
+       __fds_in.p = CMSG_DATA(cmsg);
+       fds_in = __fds_in.fds;
+       num_fds_out = num_fds_in - 1;
+
+       SWRAP_LOG(SWRAP_LOG_TRACE,
+                 "num_fds_in=%zu num_fds_out=%zu",
+                 num_fds_in, num_fds_out);
+
+       for (i = 0; i < num_fds_in; i++) {
+               /* Check if we have a stale fd and remove it */
+               swrap_remove_stale(fds_in[i]);
+       }
+
+       scm_rights_pipe_fd = fds_in[num_fds_out];
+       size_fds_out = sizeof(int) * num_fds_out;
+       cmsg_len = CMSG_LEN(size_fds_out);
+       cmsg_space = CMSG_SPACE(size_fds_out);
+
+       new_cm_data_space = *cm_data_space + cmsg_space;
+
+       p = realloc((*cm_data), new_cm_data_space);
+       if (p == NULL) {
+               swrap_close_fd_array(num_fds_in, fds_in);
+               return -1;
+       }
+       (*cm_data) = p;
+       p = (*cm_data) + (*cm_data_space);
+       memset(p, 0, cmsg_space);
+       __new_cmsg.p = p;
+       new_cmsg = __new_cmsg.cmsg;
+       *new_cmsg = *cmsg;
+       __fds_out.p = CMSG_DATA(new_cmsg);
+       fds_out = __fds_out.fds;
+       memcpy(fds_out, fds_in, size_fds_out);
+       new_cmsg->cmsg_len = cmsg_len;
+
+       sret = read(scm_rights_pipe_fd, &info, sizeof(info));
+       if (sret != sizeof(info)) {
+               int saved_errno = errno;
+               if (sret != -1) {
+                       saved_errno = EINVAL;
+               }
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "read() failed - sret=%zd - %d %s",
+                         sret, saved_errno,
+                         strerror(saved_errno));
+               swrap_close_fd_array(num_fds_in, fds_in);
+               errno = saved_errno;
+               return -1;
+       }
+       libc_close(scm_rights_pipe_fd);
+       payload = &info.payload;
+
+       if (info.magic != swrap_unix_scm_right_magic) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "info.magic=0x%llx != swrap_unix_scm_right_magic=0x%llx",
+                         (unsigned long long)info.magic,
+                         (unsigned long long)swrap_unix_scm_right_magic);
+               swrap_close_fd_array(num_fds_out, fds_out);
+               errno = EINVAL;
+               return -1;
+       }
+
+       cmp = memcmp(info.package_name,
+                    SOCKET_WRAPPER_PACKAGE,
+                    sizeof(info.package_name));
+       if (cmp != 0) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "info.package_name='%.*s' != '%s'",
+                         (int)sizeof(info.package_name),
+                         info.package_name,
+                         SOCKET_WRAPPER_PACKAGE);
+               swrap_close_fd_array(num_fds_out, fds_out);
+               errno = EINVAL;
+               return -1;
+       }
+
+       cmp = memcmp(info.package_version,
+                    SOCKET_WRAPPER_VERSION,
+                    sizeof(info.package_version));
+       if (cmp != 0) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "info.package_version='%.*s' != '%s'",
+                         (int)sizeof(info.package_version),
+                         info.package_version,
+                         SOCKET_WRAPPER_VERSION);
+               swrap_close_fd_array(num_fds_out, fds_out);
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (info.full_size != sizeof(info)) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "info.full_size=%zu != sizeof(info)=%zu",
+                         (size_t)info.full_size,
+                         sizeof(info));
+               swrap_close_fd_array(num_fds_out, fds_out);
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (info.payload_size != sizeof(info.payload)) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "info.payload_size=%zu != sizeof(info.payload)=%zu",
+                         (size_t)info.payload_size,
+                         sizeof(info.payload));
+               swrap_close_fd_array(num_fds_out, fds_out);
+               errno = EINVAL;
+               return -1;
+       }
+
+       if (payload->num_idxs != num_fds_out) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "info.num_idxs=%u != num_fds_out=%zu",
+                         payload->num_idxs, num_fds_out);
+               swrap_close_fd_array(num_fds_out, fds_out);
+               errno = EINVAL;
+               return -1;
+       }
+
+       for (i = 0; i < num_fds_out; i++) {
+               size_t j;
+
+               si_idx_array[i] = -1;
+
+               if (payload->idxs[i] == -1) {
+                       SWRAP_LOG(SWRAP_LOG_TRACE,
+                                 "fds_out[%zu]=%d not an inet socket",
+                                 i, fds_out[i]);
+                       continue;
+               }
+
+               if (payload->idxs[i] < 0) {
+                       SWRAP_LOG(SWRAP_LOG_ERROR,
+                                 "fds_out[%zu]=%d info.idxs[%zu]=%d < 0!",
+                                 i, fds_out[i], i, payload->idxs[i]);
+                       swrap_close_fd_array(num_fds_out, fds_out);
+                       errno = EINVAL;
+                       return -1;
+               }
+
+               if (payload->idxs[i] >= payload->num_idxs) {
+                       SWRAP_LOG(SWRAP_LOG_ERROR,
+                                 "fds_out[%zu]=%d info.idxs[%zu]=%d >= %u!",
+                                 i, fds_out[i], i, payload->idxs[i],
+                                 payload->num_idxs);
+                       swrap_close_fd_array(num_fds_out, fds_out);
+                       errno = EINVAL;
+                       return -1;
+               }
+
+               if ((size_t)fds_out[i] >= socket_fds_max) {
+                       SWRAP_LOG(SWRAP_LOG_ERROR,
+                                 "The max socket index limit of %zu has been reached, "
+                                 "trying to add %d",
+                                 socket_fds_max,
+                                 fds_out[i]);
+                       swrap_close_fd_array(num_fds_out, fds_out);
+                       errno = EMFILE;
+                       return -1;
+               }
+
+               SWRAP_LOG(SWRAP_LOG_TRACE,
+                         "fds_in[%zu]=%d "
+                         "received as info.idxs[%zu]=%d!",
+                         i, fds_out[i],
+                         i, payload->idxs[i]);
+
+               for (j = 0; j < i; j++) {
+                       if (payload->idxs[j] == -1) {
+                               continue;
+                       }
+                       if (payload->idxs[j] == payload->idxs[i]) {
+                               si_idx_array[i] = si_idx_array[j];
+                       }
+               }
+               if (si_idx_array[i] == -1) {
+                       const struct socket_info *si = &payload->infos[payload->idxs[i]];
+
+                       si_idx_array[i] = swrap_add_socket_info(si);
+                       if (si_idx_array[i] == -1) {
+                               int saved_errno = errno;
+                               SWRAP_LOG(SWRAP_LOG_ERROR,
+                                         "The max socket index limit of %zu has been reached, "
+                                         "trying to add %d",
+                                         socket_fds_max,
+                                         fds_out[i]);
+                               swrap_undo_si_idx_array(i, si_idx_array);
+                               swrap_close_fd_array(num_fds_out, fds_out);
+                               errno = saved_errno;
+                               return -1;
+                       }
+                       SWRAP_LOG(SWRAP_LOG_TRACE,
+                                 "Imported %s socket for protocol %s, fd=%d",
+                                 si->family == AF_INET ? "IPv4" : "IPv6",
+                                 si->type == SOCK_DGRAM ? "UDP" : "TCP",
+                                 fds_out[i]);
+               }
+       }
+
+       for (i = 0; i < num_fds_out; i++) {
+               if (si_idx_array[i] == -1) {
+                       continue;
+               }
+               set_socket_info_index(fds_out[i], si_idx_array[i]);
+       }
+
+       /* we're done ... */
+       *cm_data_space = new_cm_data_space;
+
+       return 0;
+}
+
+static int swrap_recvmsg_unix_sol_socket(struct cmsghdr *cmsg,
+                                        uint8_t **cm_data,
+                                        size_t *cm_data_space)
+{
+       int rc = -1;
+
+       switch (cmsg->cmsg_type) {
+       case SCM_RIGHTS:
+               rc = swrap_recvmsg_unix_scm_rights(cmsg,
+                                                  cm_data,
+                                                  cm_data_space);
+               break;
+       default:
+               rc = swrap_sendmsg_copy_cmsg(cmsg,
+                                            cm_data,
+                                            cm_data_space);
+               break;
+       }
+
+       return rc;
+}
+
+#endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
+
+static int swrap_sendmsg_before_unix(const struct msghdr *_msg_in,
+                                    struct msghdr *msg_tmp,
+                                    int *scm_rights_pipe_fd)
+{
+#ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
+       struct msghdr *msg_in = discard_const_p(struct msghdr, _msg_in);
+       struct cmsghdr *cmsg = NULL;
+       uint8_t *cm_data = NULL;
+       size_t cm_data_space = 0;
+       int rc = -1;
+
+       *msg_tmp = *msg_in;
+       *scm_rights_pipe_fd = -1;
+
+       /* Nothing to do */
+       if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
+               return 0;
+       }
+
+       for (cmsg = CMSG_FIRSTHDR(msg_in);
+            cmsg != NULL;
+            cmsg = CMSG_NXTHDR(msg_in, cmsg)) {
+               switch (cmsg->cmsg_level) {
+               case SOL_SOCKET:
+                       rc = swrap_sendmsg_unix_sol_socket(cmsg,
+                                                          &cm_data,
+                                                          &cm_data_space,
+                                                          scm_rights_pipe_fd);
+                       break;
+
+               default:
+                       rc = swrap_sendmsg_copy_cmsg(cmsg,
+                                                    &cm_data,
+                                                    &cm_data_space);
+                       break;
+               }
+               if (rc < 0) {
+                       int saved_errno = errno;
+                       SAFE_FREE(cm_data);
+                       errno = saved_errno;
+                       return rc;
+               }
+       }
+
+       msg_tmp->msg_controllen = cm_data_space;
+       msg_tmp->msg_control = cm_data;
+
+       return 0;
+#else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
+       *msg_tmp = *_msg_in;
+       return 0;
+#endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
+}
+
+static ssize_t swrap_sendmsg_after_unix(struct msghdr *msg_tmp,
+                                       ssize_t ret,
+                                       int scm_rights_pipe_fd)
+{
+#ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
+       int saved_errno = errno;
+       SAFE_FREE(msg_tmp->msg_control);
+       if (scm_rights_pipe_fd != -1) {
+               libc_close(scm_rights_pipe_fd);
+       }
+       errno = saved_errno;
+#endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
+       return ret;
+}
+
+static int swrap_recvmsg_before_unix(struct msghdr *msg_in,
+                                    struct msghdr *msg_tmp,
+                                    uint8_t **tmp_control)
+{
+#ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
+       const size_t cm_extra_space = CMSG_SPACE(sizeof(int));
+       uint8_t *cm_data = NULL;
+       size_t cm_data_space = 0;
+
+       *msg_tmp = *msg_in;
+       *tmp_control = NULL;
+
+       SWRAP_LOG(SWRAP_LOG_TRACE,
+                 "msg_in->msg_controllen=%zu",
+                 (size_t)msg_in->msg_controllen);
+
+       /* Nothing to do */
+       if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
+               return 0;
+       }
+
+       /*
+        * We need to give the kernel a bit more space in order
+        * recv the pipe fd, added by swrap_sendmsg_before_unix()).
+        * swrap_recvmsg_after_unix() will hide it again.
+        */
+       cm_data_space = msg_in->msg_controllen;
+       if (cm_data_space < (INT32_MAX - cm_extra_space)) {
+               cm_data_space += cm_extra_space;
+       }
+       cm_data = calloc(1, cm_data_space);
+       if (cm_data == NULL) {
+               return -1;
+       }
+
+       msg_tmp->msg_controllen = cm_data_space;
+       msg_tmp->msg_control = cm_data;
+       *tmp_control = cm_data;
+
+       SWRAP_LOG(SWRAP_LOG_TRACE,
+                 "msg_tmp->msg_controllen=%zu",
+                 (size_t)msg_tmp->msg_controllen);
+       return 0;
+#else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
+       *msg_tmp = *msg_in;
+       *tmp_control = NULL;
+       return 0;
+#endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
+}
+
+static ssize_t swrap_recvmsg_after_unix(struct msghdr *msg_tmp,
+                                       uint8_t **tmp_control,
+                                       struct msghdr *msg_out,
+                                       ssize_t ret)
+{
+#ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
+       struct cmsghdr *cmsg = NULL;
+       uint8_t *cm_data = NULL;
+       size_t cm_data_space = 0;
+       int rc = -1;
+
+       if (ret < 0) {
+               int saved_errno = errno;
+               SWRAP_LOG(SWRAP_LOG_TRACE, "ret=%zd - %d - %s", ret,
+                         saved_errno, strerror(saved_errno));
+               SAFE_FREE(*tmp_control);
+               /* msg_out should not be touched on error */
+               errno = saved_errno;
+               return ret;
+       }
+
+       SWRAP_LOG(SWRAP_LOG_TRACE,
+                 "msg_tmp->msg_controllen=%zu",
+                 (size_t)msg_tmp->msg_controllen);
+
+       /* Nothing to do */
+       if (msg_tmp->msg_controllen == 0 || msg_tmp->msg_control == NULL) {
+               int saved_errno = errno;
+               *msg_out = *msg_tmp;
+               SAFE_FREE(*tmp_control);
+               errno = saved_errno;
+               return ret;
+       }
+
+       for (cmsg = CMSG_FIRSTHDR(msg_tmp);
+            cmsg != NULL;
+            cmsg = CMSG_NXTHDR(msg_tmp, cmsg)) {
+               switch (cmsg->cmsg_level) {
+               case SOL_SOCKET:
+                       rc = swrap_recvmsg_unix_sol_socket(cmsg,
+                                                          &cm_data,
+                                                          &cm_data_space);
+                       break;
+
+               default:
+                       rc = swrap_sendmsg_copy_cmsg(cmsg,
+                                                    &cm_data,
+                                                    &cm_data_space);
+                       break;
+               }
+               if (rc < 0) {
+                       int saved_errno = errno;
+                       SAFE_FREE(cm_data);
+                       SAFE_FREE(*tmp_control);
+                       errno = saved_errno;
+                       return rc;
+               }
+       }
+
+       /*
+        * msg_tmp->msg_control (*tmp_control) was created by
+        * swrap_recvmsg_before_unix() and msg_out->msg_control
+        * is still the buffer of the caller.
+        */
+       msg_tmp->msg_control = msg_out->msg_control;
+       msg_tmp->msg_controllen = msg_out->msg_controllen;
+       *msg_out = *msg_tmp;
+
+       cm_data_space = MIN(cm_data_space, msg_out->msg_controllen);
+       memcpy(msg_out->msg_control, cm_data, cm_data_space);
+       msg_out->msg_controllen = cm_data_space;
+       SAFE_FREE(cm_data);
+       SAFE_FREE(*tmp_control);
+
+       SWRAP_LOG(SWRAP_LOG_TRACE,
+                 "msg_out->msg_controllen=%zu",
+                 (size_t)msg_out->msg_controllen);
+       return ret;
+#else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
+       int saved_errno = errno;
+       *msg_out = *msg_tmp;
+       SAFE_FREE(*tmp_control);
+       errno = saved_errno;
+       return ret;
+#endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
+}
+
+static ssize_t swrap_sendmsg_before(int fd,
+                                   struct socket_info *si,
+                                   struct msghdr *msg,
+                                   struct iovec *tmp_iov,
+                                   struct sockaddr_un *tmp_un,
+                                   const struct sockaddr_un **to_un,
+                                   const struct sockaddr **to,
+                                   int *bcast)
+{
+       size_t i, len = 0;
+       ssize_t ret = -1;
+
+       if (to_un) {
+               *to_un = NULL;
+       }
+       if (to) {
+               *to = NULL;
+       }
+       if (bcast) {
+               *bcast = 0;
+       }
+
+       SWRAP_LOCK_SI(si);
+
+       switch (si->type) {
+       case SOCK_STREAM: {
+               unsigned long mtu;
+
+               if (!si->connected) {
+                       errno = ENOTCONN;
+                       goto out;
+               }
+
+               if (msg->msg_iovlen == 0) {
+                       break;
+               }
+
+               mtu = socket_wrapper_mtu();
+               for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
+                       size_t nlen;
+                       nlen = len + msg->msg_iov[i].iov_len;
+                       if (nlen < len) {
+                               /* overflow */
+                               errno = EMSGSIZE;
+                               goto out;
+                       }
+                       if (nlen > mtu) {
+                               break;
+                       }
+               }
+               msg->msg_iovlen = i;
+               if (msg->msg_iovlen == 0) {
+                       *tmp_iov = msg->msg_iov[0];
+                       tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
+                                              (size_t)mtu);
+                       msg->msg_iov = tmp_iov;
+                       msg->msg_iovlen = 1;
+               }
+               break;
+       }
+       case SOCK_DGRAM:
+               if (si->connected) {
+                       if (msg->msg_name != NULL) {
+                               /*
+                                * We are dealing with unix sockets and if we
+                                * are connected, we should only talk to the
+                                * connected unix path. Using the fd to send
+                                * to another server would be hard to achieve.
+                                */
+                               msg->msg_name = NULL;
+                               msg->msg_namelen = 0;
+                       }
+               } else {
+                       const struct sockaddr *msg_name;
+                       msg_name = (const struct sockaddr *)msg->msg_name;
+
+                       if (msg_name == NULL) {
+                               errno = ENOTCONN;
+                               goto out;
+                       }
+
+
+                       ret = sockaddr_convert_to_un(si, msg_name, msg->msg_namelen,
+                                                    tmp_un, 0, bcast);
+                       if (ret == -1) {
+                               goto out;
+                       }
+
+                       if (to_un) {
+                               *to_un = tmp_un;
+                       }
+                       if (to) {
+                               *to = msg_name;
+                       }
+                       msg->msg_name = tmp_un;
+                       msg->msg_namelen = sizeof(*tmp_un);
+               }
+
+               if (si->bound == 0) {
+                       ret = swrap_auto_bind(fd, si, si->family);
+                       if (ret == -1) {
+                               SWRAP_UNLOCK_SI(si);
+                               if (errno == ENOTSOCK) {
+                                       swrap_remove_stale(fd);
+                                       ret = -ENOTSOCK;
+                               } else {
+                                       SWRAP_LOG(SWRAP_LOG_ERROR, "swrap_sendmsg_before failed");
+                               }
+                               return ret;
+                       }
                }
 
                if (!si->defer_connect) {
@@ -5344,9 +6286,11 @@ static void swrap_sendmsg_after(int fd,
 
        for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
                size_t this_time = MIN(remain, (size_t)msg->msg_iov[i].iov_len);
-               memcpy(buf + ofs,
-                      msg->msg_iov[i].iov_base,
-                      this_time);
+               if (this_time > 0) {
+                       memcpy(buf + ofs,
+                              msg->msg_iov[i].iov_base,
+                              this_time);
+               }
                ofs += this_time;
                remain -= this_time;
        }
@@ -6060,7 +7004,13 @@ static ssize_t swrap_recvmsg(int s, struct msghdr *omsg, int flags)
 
        si = find_socket_info(s);
        if (si == NULL) {
-               return libc_recvmsg(s, omsg, flags);
+               uint8_t *tmp_control = NULL;
+               rc = swrap_recvmsg_before_unix(omsg, &msg, &tmp_control);
+               if (rc < 0) {
+                       return rc;
+               }
+               ret = libc_recvmsg(s, &msg, flags);
+               return swrap_recvmsg_after_unix(&msg, &tmp_control, omsg, ret);
        }
 
        tmp.iov_base = NULL;
@@ -6183,7 +7133,15 @@ static ssize_t swrap_sendmsg(int s, const struct msghdr *omsg, int flags)
        int bcast = 0;
 
        if (!si) {
-               return libc_sendmsg(s, omsg, flags);
+               int scm_rights_pipe_fd = -1;
+
+               rc = swrap_sendmsg_before_unix(omsg, &msg,
+                                              &scm_rights_pipe_fd);
+               if (rc < 0) {
+                       return rc;
+               }
+               ret = libc_sendmsg(s, &msg, flags);
+               return swrap_sendmsg_after_unix(&msg, ret, scm_rights_pipe_fd);
        }
 
        ZERO_STRUCT(un_addr);
@@ -6441,10 +7399,13 @@ ssize_t writev(int s, const struct iovec *vector, int count)
  * CLOSE
  ***************************/
 
-static int swrap_close(int fd)
+static int swrap_remove_wrapper(const char *__func_name,
+                               int (*__close_fd_fn)(int fd),
+                               int fd)
 {
        struct socket_info *si = NULL;
        int si_index;
+       int ret_errno = errno;
        int ret;
 
        swrap_mutex_lock(&socket_reset_mutex);
@@ -6452,10 +7413,10 @@ static int swrap_close(int fd)
        si_index = find_socket_info_index(fd);
        if (si_index == -1) {
                swrap_mutex_unlock(&socket_reset_mutex);
-               return libc_close(fd);
+               return __close_fd_fn(fd);
        }
 
-       SWRAP_LOG(SWRAP_LOG_TRACE, "Close wrapper for fd=%d", fd);
+       swrap_log(SWRAP_LOG_TRACE, __func_name, "Remove wrapper for fd=%d", fd);
        reset_socket_info_index(fd);
 
        si = swrap_get_socket_info(si_index);
@@ -6463,7 +7424,10 @@ static int swrap_close(int fd)
        swrap_mutex_lock(&first_free_mutex);
        SWRAP_LOCK_SI(si);
 
-       ret = libc_close(fd);
+       ret = __close_fd_fn(fd);
+       if (ret == -1) {
+               ret_errno = errno;
+       }
 
        swrap_dec_refcount(si);
 
@@ -6472,6 +7436,10 @@ static int swrap_close(int fd)
                goto out;
        }
 
+       if (si->fd_passed) {
+               goto set_next_free;
+       }
+
        if (si->myname.sa_socklen > 0 && si->peername.sa_socklen > 0) {
                swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_SEND, NULL, 0);
        }
@@ -6485,6 +7453,7 @@ static int swrap_close(int fd)
                unlink(si->un_addr.sun_path);
        }
 
+set_next_free:
        swrap_set_next_free(si, first_free);
        first_free = si_index;
 
@@ -6493,14 +7462,68 @@ out:
        swrap_mutex_unlock(&first_free_mutex);
        swrap_mutex_unlock(&socket_reset_mutex);
 
+       errno = ret_errno;
        return ret;
 }
 
+static int swrap_noop_close(int fd)
+{
+       (void)fd; /* unused */
+       return 0;
+}
+
+static void swrap_remove_stale(int fd)
+{
+       swrap_remove_wrapper(__func__, swrap_noop_close, fd);
+}
+
+/*
+ * This allows socket_wrapper aware applications to
+ * indicate that the given fd does not belong to
+ * an inet socket.
+ *
+ * We already overload a lot of unrelated functions
+ * like eventfd(), timerfd_create(), ... in order to
+ * call swrap_remove_stale() on the returned fd, but
+ * we'll never be able to handle all possible syscalls.
+ *
+ * socket_wrapper_indicate_no_inet_fd() gives them a way
+ * to do the same.
+ *
+ * We don't export swrap_remove_stale() in order to
+ * make it easier to analyze SOCKET_WRAPPER_DEBUGLEVEL=3
+ * log files.
+ */
+void socket_wrapper_indicate_no_inet_fd(int fd)
+{
+       swrap_remove_wrapper(__func__, swrap_noop_close, fd);
+}
+
+static int swrap_close(int fd)
+{
+       return swrap_remove_wrapper(__func__, libc_close, fd);
+}
+
 int close(int fd)
 {
        return swrap_close(fd);
 }
 
+#ifdef HAVE___CLOSE_NOCANCEL
+
+static int swrap___close_nocancel(int fd)
+{
+       return swrap_remove_wrapper(__func__, libc___close_nocancel, fd);
+}
+
+int __close_nocancel(int fd);
+int __close_nocancel(int fd)
+{
+       return swrap___close_nocancel(fd);
+}
+
+#endif /* HAVE___CLOSE_NOCANCEL */
+
 /****************************
  * DUP
  ***************************/
@@ -6524,6 +7547,17 @@ static int swrap_dup(int fd)
                return -1;
        }
 
+       if ((size_t)dup_fd >= socket_fds_max) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "The max socket index limit of %zu has been reached, "
+                         "trying to add %d",
+                         socket_fds_max,
+                         dup_fd);
+               libc_close(dup_fd);
+               errno = EMFILE;
+               return -1;
+       }
+
        SWRAP_LOCK_SI(si);
 
        swrap_inc_refcount(si);
@@ -6569,6 +7603,16 @@ static int swrap_dup2(int fd, int newfd)
                return newfd;
        }
 
+       if ((size_t)newfd >= socket_fds_max) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "The max socket index limit of %zu has been reached, "
+                         "trying to add %d",
+                         socket_fds_max,
+                         newfd);
+               errno = EMFILE;
+               return -1;
+       }
+
        if (find_socket_info(newfd)) {
                /* dup2() does an implicit close of newfd, which we
                 * need to emulate */
@@ -6626,14 +7670,26 @@ static int swrap_vfcntl(int fd, int cmd, va_list va)
                        return -1;
                }
 
+               /* Make sure we don't have an entry for the fd */
+               swrap_remove_stale(dup_fd);
+
+               if ((size_t)dup_fd >= socket_fds_max) {
+                       SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "The max socket index limit of %zu has been reached, "
+                         "trying to add %d",
+                         socket_fds_max,
+                         dup_fd);
+                       libc_close(dup_fd);
+                       errno = EMFILE;
+                       return -1;
+               }
+
                SWRAP_LOCK_SI(si);
 
                swrap_inc_refcount(si);
 
                SWRAP_UNLOCK_SI(si);
 
-               /* Make sure we don't have an entry for the fd */
-               swrap_remove_stale(dup_fd);
 
                set_socket_info_index(dup_fd, idx);
 
@@ -6727,6 +7783,19 @@ static void swrap_thread_child(void)
  ***************************/
 void swrap_constructor(void)
 {
+       if (PIPE_BUF < sizeof(struct swrap_unix_scm_rights)) {
+               SWRAP_LOG(SWRAP_LOG_ERROR,
+                         "PIPE_BUF=%zu < "
+                         "sizeof(struct swrap_unix_scm_rights)=%zu\n"
+                         "sizeof(struct swrap_unix_scm_rights_payload)=%zu "
+                         "sizeof(struct socket_info)=%zu",
+                         (size_t)PIPE_BUF,
+                         sizeof(struct swrap_unix_scm_rights),
+                         sizeof(struct swrap_unix_scm_rights_payload),
+                         sizeof(struct socket_info));
+               exit(-1);
+       }
+
        SWRAP_REINIT_ALL;
 
        /*
@@ -6762,10 +7831,18 @@ void swrap_destructor(void)
 
        SAFE_FREE(sockets);
 
-       if (swrap.libc.handle != NULL) {
+       if (swrap.libc.handle != NULL
+#ifdef RTLD_NEXT
+           && swrap.libc.handle != RTLD_NEXT
+#endif
+                       ) {
                dlclose(swrap.libc.handle);
        }
-       if (swrap.libc.socket_handle) {
+       if (swrap.libc.socket_handle
+#ifdef RTLD_NEXT
+           && swrap.libc.socket_handle != RTLD_NEXT
+#endif
+                       ) {
                dlclose(swrap.libc.socket_handle);
        }
 }
@@ -6783,8 +7860,8 @@ void swrap_destructor(void)
  * related syscalls also with the '_' prefix.
  *
  * This is tested in Samba's 'make test',
- * there we noticed that providing '_read'
- * and '_open' would cause errors, which
+ * there we noticed that providing '_read',
+ * '_open' and '_close' would cause errors, which
  * means we skip '_read', '_write' and
  * all non socket related calls without
  * further analyzing the problem.
@@ -6797,7 +7874,6 @@ SWRAP_SYMBOL_ALIAS(accept4, _accept4);
 #endif
 SWRAP_SYMBOL_ALIAS(accept, _accept);
 SWRAP_SYMBOL_ALIAS(bind, _bind);
-SWRAP_SYMBOL_ALIAS(close, _close);
 SWRAP_SYMBOL_ALIAS(connect, _connect);
 SWRAP_SYMBOL_ALIAS(dup, _dup);
 SWRAP_SYMBOL_ALIAS(dup2, _dup2);