44cfad8c6cfd4790ba82a3a76cc91799a28c7466
[socket_wrapper.git] / src / socket_wrapper.c
1 /*
2  * BSD 3-Clause License
3  *
4  * Copyright (c) 2005-2008, Jelmer Vernooij <jelmer@samba.org>
5  * Copyright (c) 2006-2021, Stefan Metzmacher <metze@samba.org>
6  * Copyright (c) 2013-2021, Andreas Schneider <asn@samba.org>
7  * Copyright (c) 2014-2017, Michael Adam <obnox@samba.org>
8  * Copyright (c) 2016-2018, Anoop C S <anoopcs@redhat.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  *
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * 3. Neither the name of the author nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38
39 /*
40    Socket wrapper library. Passes all socket communication over
41    unix domain sockets if the environment variable SOCKET_WRAPPER_DIR
42    is set.
43 */
44
45 #include "config.h"
46
47 #include <sys/types.h>
48 #include <sys/time.h>
49 #include <sys/stat.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #ifdef HAVE_SYS_FILIO_H
53 #include <sys/filio.h>
54 #endif
55 #ifdef HAVE_SYS_SIGNALFD_H
56 #include <sys/signalfd.h>
57 #endif
58 #ifdef HAVE_SYS_EVENTFD_H
59 #include <sys/eventfd.h>
60 #endif
61 #ifdef HAVE_SYS_TIMERFD_H
62 #include <sys/timerfd.h>
63 #endif
64 #include <sys/uio.h>
65 #include <errno.h>
66 #include <sys/un.h>
67 #include <netinet/in.h>
68 #include <netinet/tcp.h>
69 #ifdef HAVE_NETINET_TCP_FSM_H
70 #include <netinet/tcp_fsm.h>
71 #endif
72 #include <arpa/inet.h>
73 #include <fcntl.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <stdio.h>
77 #include <stdint.h>
78 #include <stdarg.h>
79 #include <stdbool.h>
80 #include <unistd.h>
81 #ifdef HAVE_GNU_LIB_NAMES_H
82 #include <gnu/lib-names.h>
83 #endif
84 #ifdef HAVE_RPC_RPC_H
85 #include <rpc/rpc.h>
86 #endif
87 #include <pthread.h>
88
89 #include "socket_wrapper.h"
90
91 enum swrap_dbglvl_e {
92         SWRAP_LOG_ERROR = 0,
93         SWRAP_LOG_WARN,
94         SWRAP_LOG_DEBUG,
95         SWRAP_LOG_TRACE
96 };
97
98 /* GCC have printf type attribute check. */
99 #ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT
100 #define PRINTF_ATTRIBUTE(a,b) __attribute__ ((__format__ (__printf__, a, b)))
101 #else
102 #define PRINTF_ATTRIBUTE(a,b)
103 #endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */
104
105 #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
106 #define CONSTRUCTOR_ATTRIBUTE __attribute__ ((constructor))
107 #else
108 #define CONSTRUCTOR_ATTRIBUTE
109 #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
110
111 #ifdef HAVE_DESTRUCTOR_ATTRIBUTE
112 #define DESTRUCTOR_ATTRIBUTE __attribute__ ((destructor))
113 #else
114 #define DESTRUCTOR_ATTRIBUTE
115 #endif
116
117 #ifndef FALL_THROUGH
118 # ifdef HAVE_FALLTHROUGH_ATTRIBUTE
119 #  define FALL_THROUGH __attribute__ ((fallthrough))
120 # else /* HAVE_FALLTHROUGH_ATTRIBUTE */
121 #  define FALL_THROUGH ((void)0)
122 # endif /* HAVE_FALLTHROUGH_ATTRIBUTE */
123 #endif /* FALL_THROUGH */
124
125 #ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE
126 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE __attribute__((no_sanitize_address))
127 #else
128 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
129 #endif
130
131 #ifdef HAVE_GCC_THREAD_LOCAL_STORAGE
132 # define SWRAP_THREAD __thread
133 #else
134 # define SWRAP_THREAD
135 #endif
136
137 #ifndef MIN
138 #define MIN(a,b) ((a)<(b)?(a):(b))
139 #endif
140
141 #ifndef ZERO_STRUCT
142 #define ZERO_STRUCT(x) memset((char *)&(x), 0, sizeof(x))
143 #endif
144
145 #ifndef ZERO_STRUCTP
146 #define ZERO_STRUCTP(x) do { \
147                 if ((x) != NULL) \
148                         memset((char *)(x), 0, sizeof(*(x))); \
149         } while(0)
150 #endif
151
152 #ifndef SAFE_FREE
153 #define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0)
154 #endif
155
156 #ifndef discard_const
157 #define discard_const(ptr) ((void *)((uintptr_t)(ptr)))
158 #endif
159
160 #ifndef discard_const_p
161 #define discard_const_p(type, ptr) ((type *)discard_const(ptr))
162 #endif
163
164 #define UNUSED(x) (void)(x)
165
166 #ifdef IPV6_PKTINFO
167 # ifndef IPV6_RECVPKTINFO
168 #  define IPV6_RECVPKTINFO IPV6_PKTINFO
169 # endif /* IPV6_RECVPKTINFO */
170 #endif /* IPV6_PKTINFO */
171
172 /*
173  * On BSD IP_PKTINFO has a different name because during
174  * the time when they implemented it, there was no RFC.
175  * The name for IPv6 is the same as on Linux.
176  */
177 #ifndef IP_PKTINFO
178 # ifdef IP_RECVDSTADDR
179 #  define IP_PKTINFO IP_RECVDSTADDR
180 # endif
181 #endif
182
183 #define socket_wrapper_init_mutex(m) \
184         _socket_wrapper_init_mutex(m, #m)
185
186 /* Add new global locks here please */
187 # define SWRAP_REINIT_ALL do { \
188         int ret; \
189         ret = socket_wrapper_init_mutex(&sockets_mutex); \
190         if (ret != 0) exit(-1); \
191         ret = socket_wrapper_init_mutex(&socket_reset_mutex); \
192         if (ret != 0) exit(-1); \
193         ret = socket_wrapper_init_mutex(&first_free_mutex); \
194         if (ret != 0) exit(-1); \
195         ret = socket_wrapper_init_mutex(&sockets_si_global); \
196         if (ret != 0) exit(-1); \
197         ret = socket_wrapper_init_mutex(&autobind_start_mutex); \
198         if (ret != 0) exit(-1); \
199         ret = socket_wrapper_init_mutex(&pcap_dump_mutex); \
200         if (ret != 0) exit(-1); \
201         ret = socket_wrapper_init_mutex(&mtu_update_mutex); \
202         if (ret != 0) exit(-1); \
203 } while(0)
204
205 # define SWRAP_LOCK_ALL do { \
206         swrap_mutex_lock(&sockets_mutex); \
207         swrap_mutex_lock(&socket_reset_mutex); \
208         swrap_mutex_lock(&first_free_mutex); \
209         swrap_mutex_lock(&sockets_si_global); \
210         swrap_mutex_lock(&autobind_start_mutex); \
211         swrap_mutex_lock(&pcap_dump_mutex); \
212         swrap_mutex_lock(&mtu_update_mutex); \
213 } while(0)
214
215 # define SWRAP_UNLOCK_ALL do { \
216         swrap_mutex_unlock(&mtu_update_mutex); \
217         swrap_mutex_unlock(&pcap_dump_mutex); \
218         swrap_mutex_unlock(&autobind_start_mutex); \
219         swrap_mutex_unlock(&sockets_si_global); \
220         swrap_mutex_unlock(&first_free_mutex); \
221         swrap_mutex_unlock(&socket_reset_mutex); \
222         swrap_mutex_unlock(&sockets_mutex); \
223 } while(0)
224
225 #define SOCKET_INFO_CONTAINER(si) \
226         (struct socket_info_container *)(si)
227
228 #define SWRAP_LOCK_SI(si) do { \
229         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
230         if (sic != NULL) { \
231                 swrap_mutex_lock(&sockets_si_global); \
232         } else { \
233                 abort(); \
234         } \
235 } while(0)
236
237 #define SWRAP_UNLOCK_SI(si) do { \
238         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
239         if (sic != NULL) { \
240                 swrap_mutex_unlock(&sockets_si_global); \
241         } else { \
242                 abort(); \
243         } \
244 } while(0)
245
246 #if defined(HAVE_GETTIMEOFDAY_TZ) || defined(HAVE_GETTIMEOFDAY_TZ_VOID)
247 #define swrapGetTimeOfDay(tval) gettimeofday(tval,NULL)
248 #else
249 #define swrapGetTimeOfDay(tval) gettimeofday(tval)
250 #endif
251
252 /* we need to use a very terse format here as IRIX 6.4 silently
253    truncates names to 16 chars, so if we use a longer name then we
254    can't tell which port a packet came from with recvfrom()
255
256    with this format we have 8 chars left for the directory name
257 */
258 #define SOCKET_FORMAT "%c%02X%04X"
259 #define SOCKET_TYPE_CHAR_TCP            'T'
260 #define SOCKET_TYPE_CHAR_UDP            'U'
261 #define SOCKET_TYPE_CHAR_TCP_V6         'X'
262 #define SOCKET_TYPE_CHAR_UDP_V6         'Y'
263
264 /*
265  * Set the packet MTU to 1500 bytes for stream sockets to make it it easier to
266  * format PCAP capture files (as the caller will simply continue from here).
267  */
268 #define SOCKET_WRAPPER_MTU_DEFAULT 1500
269 #define SOCKET_WRAPPER_MTU_MIN     512
270 #define SOCKET_WRAPPER_MTU_MAX     32768
271
272 #define SOCKET_MAX_SOCKETS 1024
273
274 /*
275  * Maximum number of socket_info structures that can
276  * be used. Can be overriden by the environment variable
277  * SOCKET_WRAPPER_MAX_SOCKETS.
278  */
279 #define SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT 65535
280
281 #define SOCKET_WRAPPER_MAX_SOCKETS_LIMIT 262140
282
283 /* This limit is to avoid broadcast sendto() needing to stat too many
284  * files.  It may be raised (with a performance cost) to up to 254
285  * without changing the format above */
286 #define MAX_WRAPPED_INTERFACES 64
287
288 struct swrap_address {
289         socklen_t sa_socklen;
290         union {
291                 struct sockaddr s;
292                 struct sockaddr_in in;
293 #ifdef HAVE_IPV6
294                 struct sockaddr_in6 in6;
295 #endif
296                 struct sockaddr_un un;
297                 struct sockaddr_storage ss;
298         } sa;
299 };
300
301 static int first_free;
302
303 struct socket_info
304 {
305         /*
306          * Remember to update swrap_unix_scm_right_magic
307          * on any change.
308          */
309
310         int family;
311         int type;
312         int protocol;
313         int bound;
314         int bcast;
315         int is_server;
316         int connected;
317         int defer_connect;
318         int pktinfo;
319         int tcp_nodelay;
320         int listening;
321         int fd_passed;
322
323         /* The unix path so we can unlink it on close() */
324         struct sockaddr_un un_addr;
325
326         struct swrap_address bindname;
327         struct swrap_address myname;
328         struct swrap_address peername;
329
330         struct {
331                 unsigned long pck_snd;
332                 unsigned long pck_rcv;
333         } io;
334 };
335
336 struct socket_info_meta
337 {
338         unsigned int refcount;
339         int next_free;
340         /*
341          * As long as we don't use shared memory
342          * for the sockets array, we use
343          * sockets_si_global as a single mutex.
344          *
345          * pthread_mutex_t mutex;
346          */
347 };
348
349 struct socket_info_container
350 {
351         struct socket_info info;
352         struct socket_info_meta meta;
353 };
354
355 static struct socket_info_container *sockets;
356
357 static size_t socket_info_max = 0;
358
359 /*
360  * Allocate the socket array always on the limit value. We want it to be
361  * at least bigger than the default so if we reach the limit we can
362  * still deal with duplicate fds pointing to the same socket_info.
363  */
364 static size_t socket_fds_max = SOCKET_WRAPPER_MAX_SOCKETS_LIMIT;
365
366 /* Hash table to map fds to corresponding socket_info index */
367 static int *socket_fds_idx;
368
369 /* Mutex for syncronizing port selection during swrap_auto_bind() */
370 static pthread_mutex_t autobind_start_mutex = PTHREAD_MUTEX_INITIALIZER;
371
372 /* Mutex to guard the initialization of array of socket_info structures */
373 static pthread_mutex_t sockets_mutex = PTHREAD_MUTEX_INITIALIZER;
374
375 /* Mutex to guard the socket reset in swrap_remove_wrapper() */
376 static pthread_mutex_t socket_reset_mutex = PTHREAD_MUTEX_INITIALIZER;
377
378 /* Mutex to synchronize access to first free index in socket_info array */
379 static pthread_mutex_t first_free_mutex = PTHREAD_MUTEX_INITIALIZER;
380
381 /*
382  * Mutex to synchronize access to to socket_info structures
383  * We use a single global mutex in order to avoid leaking
384  * ~ 38M copy on write memory per fork.
385  * max_sockets=65535 * sizeof(struct socket_info_container)=592 = 38796720
386  */
387 static pthread_mutex_t sockets_si_global = PTHREAD_MUTEX_INITIALIZER;
388
389 /* Mutex to synchronize access to packet capture dump file */
390 static pthread_mutex_t pcap_dump_mutex = PTHREAD_MUTEX_INITIALIZER;
391
392 /* Mutex for synchronizing mtu value fetch*/
393 static pthread_mutex_t mtu_update_mutex = PTHREAD_MUTEX_INITIALIZER;
394
395 /* Function prototypes */
396
397 #if ! defined(HAVE_CONSTRUCTOR_ATTRIBUTE) && defined(HAVE_PRAGMA_INIT)
398 /* xlC and other oldschool compilers support (only) this */
399 #pragma init (swrap_constructor)
400 #endif
401 void swrap_constructor(void) CONSTRUCTOR_ATTRIBUTE;
402 #if ! defined(HAVE_DESTRUCTOR_ATTRIBUTE) && defined(HAVE_PRAGMA_FINI)
403 #pragma fini (swrap_destructor)
404 #endif
405 void swrap_destructor(void) DESTRUCTOR_ATTRIBUTE;
406
407 #ifndef HAVE_GETPROGNAME
408 static const char *getprogname(void)
409 {
410 #if defined(HAVE_PROGRAM_INVOCATION_SHORT_NAME)
411         return program_invocation_short_name;
412 #elif defined(HAVE_GETEXECNAME)
413         return getexecname();
414 #else
415         return NULL;
416 #endif /* HAVE_PROGRAM_INVOCATION_SHORT_NAME */
417 }
418 #endif /* HAVE_GETPROGNAME */
419
420 static void swrap_log(enum swrap_dbglvl_e dbglvl, const char *func, const char *format, ...) PRINTF_ATTRIBUTE(3, 4);
421 # define SWRAP_LOG(dbglvl, ...) swrap_log((dbglvl), __func__, __VA_ARGS__)
422
423 static void swrap_log(enum swrap_dbglvl_e dbglvl,
424                       const char *func,
425                       const char *format, ...)
426 {
427         char buffer[1024];
428         va_list va;
429         const char *d;
430         unsigned int lvl = 0;
431         const char *prefix = "SWRAP";
432         const char *progname = getprogname();
433
434         d = getenv("SOCKET_WRAPPER_DEBUGLEVEL");
435         if (d != NULL) {
436                 lvl = atoi(d);
437         }
438
439         if (lvl < dbglvl) {
440                 return;
441         }
442
443         va_start(va, format);
444         vsnprintf(buffer, sizeof(buffer), format, va);
445         va_end(va);
446
447         switch (dbglvl) {
448                 case SWRAP_LOG_ERROR:
449                         prefix = "SWRAP_ERROR";
450                         break;
451                 case SWRAP_LOG_WARN:
452                         prefix = "SWRAP_WARN";
453                         break;
454                 case SWRAP_LOG_DEBUG:
455                         prefix = "SWRAP_DEBUG";
456                         break;
457                 case SWRAP_LOG_TRACE:
458                         prefix = "SWRAP_TRACE";
459                         break;
460         }
461
462         if (progname == NULL) {
463                 progname = "<unknown>";
464         }
465
466         fprintf(stderr,
467                 "%s[%s (%u)] - %s: %s\n",
468                 prefix,
469                 progname,
470                 (unsigned int)getpid(),
471                 func,
472                 buffer);
473 }
474
475 /*********************************************************
476  * SWRAP LOADING LIBC FUNCTIONS
477  *********************************************************/
478
479 #include <dlfcn.h>
480
481 #ifdef HAVE_ACCEPT4
482 typedef int (*__libc_accept4)(int sockfd,
483                               struct sockaddr *addr,
484                               socklen_t *addrlen,
485                               int flags);
486 #else
487 typedef int (*__libc_accept)(int sockfd,
488                              struct sockaddr *addr,
489                              socklen_t *addrlen);
490 #endif
491 typedef int (*__libc_bind)(int sockfd,
492                            const struct sockaddr *addr,
493                            socklen_t addrlen);
494 typedef int (*__libc_close)(int fd);
495 #ifdef HAVE___CLOSE_NOCANCEL
496 typedef int (*__libc___close_nocancel)(int fd);
497 #endif
498 typedef int (*__libc_connect)(int sockfd,
499                               const struct sockaddr *addr,
500                               socklen_t addrlen);
501 typedef int (*__libc_dup)(int fd);
502 typedef int (*__libc_dup2)(int oldfd, int newfd);
503 typedef int (*__libc_fcntl)(int fd, int cmd, ...);
504 typedef FILE *(*__libc_fopen)(const char *name, const char *mode);
505 #ifdef HAVE_FOPEN64
506 typedef FILE *(*__libc_fopen64)(const char *name, const char *mode);
507 #endif
508 #ifdef HAVE_EVENTFD
509 typedef int (*__libc_eventfd)(int count, int flags);
510 #endif
511 typedef int (*__libc_getpeername)(int sockfd,
512                                   struct sockaddr *addr,
513                                   socklen_t *addrlen);
514 typedef int (*__libc_getsockname)(int sockfd,
515                                   struct sockaddr *addr,
516                                   socklen_t *addrlen);
517 typedef int (*__libc_getsockopt)(int sockfd,
518                                int level,
519                                int optname,
520                                void *optval,
521                                socklen_t *optlen);
522 typedef int (*__libc_ioctl)(int d, unsigned long int request, ...);
523 typedef int (*__libc_listen)(int sockfd, int backlog);
524 typedef int (*__libc_open)(const char *pathname, int flags, ...);
525 #ifdef HAVE_OPEN64
526 typedef int (*__libc_open64)(const char *pathname, int flags, ...);
527 #endif /* HAVE_OPEN64 */
528 typedef int (*__libc_openat)(int dirfd, const char *path, int flags, ...);
529 typedef int (*__libc_pipe)(int pipefd[2]);
530 typedef int (*__libc_read)(int fd, void *buf, size_t count);
531 typedef ssize_t (*__libc_readv)(int fd, const struct iovec *iov, int iovcnt);
532 typedef int (*__libc_recv)(int sockfd, void *buf, size_t len, int flags);
533 typedef int (*__libc_recvfrom)(int sockfd,
534                              void *buf,
535                              size_t len,
536                              int flags,
537                              struct sockaddr *src_addr,
538                              socklen_t *addrlen);
539 typedef int (*__libc_recvmsg)(int sockfd, const struct msghdr *msg, int flags);
540 typedef int (*__libc_send)(int sockfd, const void *buf, size_t len, int flags);
541 typedef int (*__libc_sendmsg)(int sockfd, const struct msghdr *msg, int flags);
542 typedef int (*__libc_sendto)(int sockfd,
543                            const void *buf,
544                            size_t len,
545                            int flags,
546                            const  struct sockaddr *dst_addr,
547                            socklen_t addrlen);
548 typedef int (*__libc_setsockopt)(int sockfd,
549                                int level,
550                                int optname,
551                                const void *optval,
552                                socklen_t optlen);
553 #ifdef HAVE_SIGNALFD
554 typedef int (*__libc_signalfd)(int fd, const sigset_t *mask, int flags);
555 #endif
556 typedef int (*__libc_socket)(int domain, int type, int protocol);
557 typedef int (*__libc_socketpair)(int domain, int type, int protocol, int sv[2]);
558 #ifdef HAVE_TIMERFD_CREATE
559 typedef int (*__libc_timerfd_create)(int clockid, int flags);
560 #endif
561 typedef ssize_t (*__libc_write)(int fd, const void *buf, size_t count);
562 typedef ssize_t (*__libc_writev)(int fd, const struct iovec *iov, int iovcnt);
563
564 #define SWRAP_SYMBOL_ENTRY(i) \
565         union { \
566                 __libc_##i f; \
567                 void *obj; \
568         } _libc_##i
569
570 struct swrap_libc_symbols {
571 #ifdef HAVE_ACCEPT4
572         SWRAP_SYMBOL_ENTRY(accept4);
573 #else
574         SWRAP_SYMBOL_ENTRY(accept);
575 #endif
576         SWRAP_SYMBOL_ENTRY(bind);
577         SWRAP_SYMBOL_ENTRY(close);
578 #ifdef HAVE___CLOSE_NOCANCEL
579         SWRAP_SYMBOL_ENTRY(__close_nocancel);
580 #endif
581         SWRAP_SYMBOL_ENTRY(connect);
582         SWRAP_SYMBOL_ENTRY(dup);
583         SWRAP_SYMBOL_ENTRY(dup2);
584         SWRAP_SYMBOL_ENTRY(fcntl);
585         SWRAP_SYMBOL_ENTRY(fopen);
586 #ifdef HAVE_FOPEN64
587         SWRAP_SYMBOL_ENTRY(fopen64);
588 #endif
589 #ifdef HAVE_EVENTFD
590         SWRAP_SYMBOL_ENTRY(eventfd);
591 #endif
592         SWRAP_SYMBOL_ENTRY(getpeername);
593         SWRAP_SYMBOL_ENTRY(getsockname);
594         SWRAP_SYMBOL_ENTRY(getsockopt);
595         SWRAP_SYMBOL_ENTRY(ioctl);
596         SWRAP_SYMBOL_ENTRY(listen);
597         SWRAP_SYMBOL_ENTRY(open);
598 #ifdef HAVE_OPEN64
599         SWRAP_SYMBOL_ENTRY(open64);
600 #endif
601         SWRAP_SYMBOL_ENTRY(openat);
602         SWRAP_SYMBOL_ENTRY(pipe);
603         SWRAP_SYMBOL_ENTRY(read);
604         SWRAP_SYMBOL_ENTRY(readv);
605         SWRAP_SYMBOL_ENTRY(recv);
606         SWRAP_SYMBOL_ENTRY(recvfrom);
607         SWRAP_SYMBOL_ENTRY(recvmsg);
608         SWRAP_SYMBOL_ENTRY(send);
609         SWRAP_SYMBOL_ENTRY(sendmsg);
610         SWRAP_SYMBOL_ENTRY(sendto);
611         SWRAP_SYMBOL_ENTRY(setsockopt);
612 #ifdef HAVE_SIGNALFD
613         SWRAP_SYMBOL_ENTRY(signalfd);
614 #endif
615         SWRAP_SYMBOL_ENTRY(socket);
616         SWRAP_SYMBOL_ENTRY(socketpair);
617 #ifdef HAVE_TIMERFD_CREATE
618         SWRAP_SYMBOL_ENTRY(timerfd_create);
619 #endif
620         SWRAP_SYMBOL_ENTRY(write);
621         SWRAP_SYMBOL_ENTRY(writev);
622 };
623
624 struct swrap {
625         struct {
626                 void *handle;
627                 void *socket_handle;
628                 struct swrap_libc_symbols symbols;
629         } libc;
630 };
631
632 static struct swrap swrap;
633
634 /* prototypes */
635 static char *socket_wrapper_dir(void);
636
637 #define LIBC_NAME "libc.so"
638
639 enum swrap_lib {
640     SWRAP_LIBC,
641     SWRAP_LIBSOCKET,
642 };
643
644 static const char *swrap_str_lib(enum swrap_lib lib)
645 {
646         switch (lib) {
647         case SWRAP_LIBC:
648                 return "libc";
649         case SWRAP_LIBSOCKET:
650                 return "libsocket";
651         }
652
653         /* Compiler would warn us about unhandled enum value if we get here */
654         return "unknown";
655 }
656
657 static void *swrap_load_lib_handle(enum swrap_lib lib)
658 {
659         int flags = RTLD_LAZY;
660         void *handle = NULL;
661         int i;
662
663 #ifdef RTLD_DEEPBIND
664         const char *env_preload = getenv("LD_PRELOAD");
665         const char *env_deepbind = getenv("SOCKET_WRAPPER_DISABLE_DEEPBIND");
666         bool enable_deepbind = true;
667
668         /* Don't do a deepbind if we run with libasan */
669         if (env_preload != NULL && strlen(env_preload) < 1024) {
670                 const char *p = strstr(env_preload, "libasan.so");
671                 if (p != NULL) {
672                         enable_deepbind = false;
673                 }
674         }
675
676         if (env_deepbind != NULL && strlen(env_deepbind) >= 1) {
677                 enable_deepbind = false;
678         }
679
680         if (enable_deepbind) {
681                 flags |= RTLD_DEEPBIND;
682         }
683 #endif
684
685         switch (lib) {
686         case SWRAP_LIBSOCKET:
687 #ifdef HAVE_LIBSOCKET
688                 handle = swrap.libc.socket_handle;
689                 if (handle == NULL) {
690                         for (i = 10; i >= 0; i--) {
691                                 char soname[256] = {0};
692
693                                 snprintf(soname, sizeof(soname), "libsocket.so.%d", i);
694                                 handle = dlopen(soname, flags);
695                                 if (handle != NULL) {
696                                         break;
697                                 }
698                         }
699
700                         swrap.libc.socket_handle = handle;
701                 }
702                 break;
703 #endif
704         case SWRAP_LIBC:
705                 handle = swrap.libc.handle;
706 #ifdef LIBC_SO
707                 if (handle == NULL) {
708                         handle = dlopen(LIBC_SO, flags);
709
710                         swrap.libc.handle = handle;
711                 }
712 #endif
713                 if (handle == NULL) {
714                         for (i = 10; i >= 0; i--) {
715                                 char soname[256] = {0};
716
717                                 snprintf(soname, sizeof(soname), "libc.so.%d", i);
718                                 handle = dlopen(soname, flags);
719                                 if (handle != NULL) {
720                                         break;
721                                 }
722                         }
723
724                         swrap.libc.handle = handle;
725                 }
726                 break;
727         }
728
729         if (handle == NULL) {
730 #ifdef RTLD_NEXT
731                 handle = swrap.libc.handle = swrap.libc.socket_handle = RTLD_NEXT;
732 #else
733                 SWRAP_LOG(SWRAP_LOG_ERROR,
734                           "Failed to dlopen library: %s",
735                           dlerror());
736                 exit(-1);
737 #endif
738         }
739
740         return handle;
741 }
742
743 static void *_swrap_bind_symbol(enum swrap_lib lib, const char *fn_name)
744 {
745         void *handle;
746         void *func;
747
748         handle = swrap_load_lib_handle(lib);
749
750         func = dlsym(handle, fn_name);
751         if (func == NULL) {
752                 SWRAP_LOG(SWRAP_LOG_ERROR,
753                           "Failed to find %s: %s",
754                           fn_name,
755                           dlerror());
756                 exit(-1);
757         }
758
759         SWRAP_LOG(SWRAP_LOG_TRACE,
760                   "Loaded %s from %s",
761                   fn_name,
762                   swrap_str_lib(lib));
763
764         return func;
765 }
766
767 #define swrap_mutex_lock(m) _swrap_mutex_lock(m, #m, __func__, __LINE__)
768 static void _swrap_mutex_lock(pthread_mutex_t *mutex, const char *name, const char *caller, unsigned line)
769 {
770         int ret;
771
772         ret = pthread_mutex_lock(mutex);
773         if (ret != 0) {
774                 SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't lock pthread mutex(%s) - %s",
775                           getpid(), getppid(), caller, line, name, strerror(ret));
776                 abort();
777         }
778 }
779
780 #define swrap_mutex_unlock(m) _swrap_mutex_unlock(m, #m, __func__, __LINE__)
781 static void _swrap_mutex_unlock(pthread_mutex_t *mutex, const char *name, const char *caller, unsigned line)
782 {
783         int ret;
784
785         ret = pthread_mutex_unlock(mutex);
786         if (ret != 0) {
787                 SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't unlock pthread mutex(%s) - %s",
788                           getpid(), getppid(), caller, line, name, strerror(ret));
789                 abort();
790         }
791 }
792
793 /*
794  * These macros have a thread race condition on purpose!
795  *
796  * This is an optimization to avoid locking each time we check if the symbol is
797  * bound.
798  */
799 #define _swrap_bind_symbol_generic(lib, sym_name) do { \
800         swrap.libc.symbols._libc_##sym_name.obj = \
801                 _swrap_bind_symbol(lib, #sym_name); \
802 } while(0);
803
804 #define swrap_bind_symbol_libc(sym_name) \
805         _swrap_bind_symbol_generic(SWRAP_LIBC, sym_name)
806
807 #define swrap_bind_symbol_libsocket(sym_name) \
808         _swrap_bind_symbol_generic(SWRAP_LIBSOCKET, sym_name)
809
810 static void swrap_bind_symbol_all(void);
811
812 /****************************************************************************
813  *                               IMPORTANT
814  ****************************************************************************
815  *
816  * Functions especially from libc need to be loaded individually, you can't
817  * load all at once or gdb will segfault at startup. The same applies to
818  * valgrind and has probably something todo with with the linker.  So we need
819  * load each function at the point it is called the first time.
820  *
821  ****************************************************************************/
822
823 #ifdef HAVE_ACCEPT4
824 static int libc_accept4(int sockfd,
825                         struct sockaddr *addr,
826                         socklen_t *addrlen,
827                         int flags)
828 {
829         swrap_bind_symbol_all();
830
831         return swrap.libc.symbols._libc_accept4.f(sockfd, addr, addrlen, flags);
832 }
833
834 #else /* HAVE_ACCEPT4 */
835
836 static int libc_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
837 {
838         swrap_bind_symbol_all();
839
840         return swrap.libc.symbols._libc_accept.f(sockfd, addr, addrlen);
841 }
842 #endif /* HAVE_ACCEPT4 */
843
844 static int libc_bind(int sockfd,
845                      const struct sockaddr *addr,
846                      socklen_t addrlen)
847 {
848         swrap_bind_symbol_all();
849
850         return swrap.libc.symbols._libc_bind.f(sockfd, addr, addrlen);
851 }
852
853 static int libc_close(int fd)
854 {
855         swrap_bind_symbol_all();
856
857         return swrap.libc.symbols._libc_close.f(fd);
858 }
859
860 #ifdef HAVE___CLOSE_NOCANCEL
861 static int libc___close_nocancel(int fd)
862 {
863         swrap_bind_symbol_all();
864
865         return swrap.libc.symbols._libc___close_nocancel.f(fd);
866 }
867 #endif /* HAVE___CLOSE_NOCANCEL */
868
869 static int libc_connect(int sockfd,
870                         const struct sockaddr *addr,
871                         socklen_t addrlen)
872 {
873         swrap_bind_symbol_all();
874
875         return swrap.libc.symbols._libc_connect.f(sockfd, addr, addrlen);
876 }
877
878 static int libc_dup(int fd)
879 {
880         swrap_bind_symbol_all();
881
882         return swrap.libc.symbols._libc_dup.f(fd);
883 }
884
885 static int libc_dup2(int oldfd, int newfd)
886 {
887         swrap_bind_symbol_all();
888
889         return swrap.libc.symbols._libc_dup2.f(oldfd, newfd);
890 }
891
892 #ifdef HAVE_EVENTFD
893 static int libc_eventfd(int count, int flags)
894 {
895         swrap_bind_symbol_all();
896
897         return swrap.libc.symbols._libc_eventfd.f(count, flags);
898 }
899 #endif
900
901 DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
902 static int libc_vfcntl(int fd, int cmd, va_list ap)
903 {
904         void *arg;
905         int rc;
906
907         swrap_bind_symbol_all();
908
909         arg = va_arg(ap, void *);
910
911         rc = swrap.libc.symbols._libc_fcntl.f(fd, cmd, arg);
912
913         return rc;
914 }
915
916 static int libc_getpeername(int sockfd,
917                             struct sockaddr *addr,
918                             socklen_t *addrlen)
919 {
920         swrap_bind_symbol_all();
921
922         return swrap.libc.symbols._libc_getpeername.f(sockfd, addr, addrlen);
923 }
924
925 static int libc_getsockname(int sockfd,
926                             struct sockaddr *addr,
927                             socklen_t *addrlen)
928 {
929         swrap_bind_symbol_all();
930
931         return swrap.libc.symbols._libc_getsockname.f(sockfd, addr, addrlen);
932 }
933
934 static int libc_getsockopt(int sockfd,
935                            int level,
936                            int optname,
937                            void *optval,
938                            socklen_t *optlen)
939 {
940         swrap_bind_symbol_all();
941
942         return swrap.libc.symbols._libc_getsockopt.f(sockfd,
943                                                      level,
944                                                      optname,
945                                                      optval,
946                                                      optlen);
947 }
948
949 DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
950 static int libc_vioctl(int d, unsigned long int request, va_list ap)
951 {
952         void *arg;
953         int rc;
954
955         swrap_bind_symbol_all();
956
957         arg = va_arg(ap, void *);
958
959         rc = swrap.libc.symbols._libc_ioctl.f(d, request, arg);
960
961         return rc;
962 }
963
964 static int libc_listen(int sockfd, int backlog)
965 {
966         swrap_bind_symbol_all();
967
968         return swrap.libc.symbols._libc_listen.f(sockfd, backlog);
969 }
970
971 static FILE *libc_fopen(const char *name, const char *mode)
972 {
973         swrap_bind_symbol_all();
974
975         return swrap.libc.symbols._libc_fopen.f(name, mode);
976 }
977
978 #ifdef HAVE_FOPEN64
979 static FILE *libc_fopen64(const char *name, const char *mode)
980 {
981         swrap_bind_symbol_all();
982
983         return swrap.libc.symbols._libc_fopen64.f(name, mode);
984 }
985 #endif /* HAVE_FOPEN64 */
986
987 static int libc_vopen(const char *pathname, int flags, va_list ap)
988 {
989         int mode = 0;
990         int fd;
991
992         swrap_bind_symbol_all();
993
994         if (flags & O_CREAT) {
995                 mode = va_arg(ap, int);
996         }
997         fd = swrap.libc.symbols._libc_open.f(pathname, flags, (mode_t)mode);
998
999         return fd;
1000 }
1001
1002 static int libc_open(const char *pathname, int flags, ...)
1003 {
1004         va_list ap;
1005         int fd;
1006
1007         va_start(ap, flags);
1008         fd = libc_vopen(pathname, flags, ap);
1009         va_end(ap);
1010
1011         return fd;
1012 }
1013
1014 #ifdef HAVE_OPEN64
1015 static int libc_vopen64(const char *pathname, int flags, va_list ap)
1016 {
1017         int mode = 0;
1018         int fd;
1019
1020         swrap_bind_symbol_all();
1021
1022         if (flags & O_CREAT) {
1023                 mode = va_arg(ap, int);
1024         }
1025         fd = swrap.libc.symbols._libc_open64.f(pathname, flags, (mode_t)mode);
1026
1027         return fd;
1028 }
1029 #endif /* HAVE_OPEN64 */
1030
1031 static int libc_vopenat(int dirfd, const char *path, int flags, va_list ap)
1032 {
1033         int mode = 0;
1034         int fd;
1035
1036         swrap_bind_symbol_all();
1037
1038         if (flags & O_CREAT) {
1039                 mode = va_arg(ap, int);
1040         }
1041         fd = swrap.libc.symbols._libc_openat.f(dirfd,
1042                                                path,
1043                                                flags,
1044                                                (mode_t)mode);
1045
1046         return fd;
1047 }
1048
1049 #if 0
1050 static int libc_openat(int dirfd, const char *path, int flags, ...)
1051 {
1052         va_list ap;
1053         int fd;
1054
1055         va_start(ap, flags);
1056         fd = libc_vopenat(dirfd, path, flags, ap);
1057         va_end(ap);
1058
1059         return fd;
1060 }
1061 #endif
1062
1063 static int libc_pipe(int pipefd[2])
1064 {
1065         swrap_bind_symbol_all();
1066
1067         return swrap.libc.symbols._libc_pipe.f(pipefd);
1068 }
1069
1070 static int libc_read(int fd, void *buf, size_t count)
1071 {
1072         swrap_bind_symbol_all();
1073
1074         return swrap.libc.symbols._libc_read.f(fd, buf, count);
1075 }
1076
1077 static ssize_t libc_readv(int fd, const struct iovec *iov, int iovcnt)
1078 {
1079         swrap_bind_symbol_all();
1080
1081         return swrap.libc.symbols._libc_readv.f(fd, iov, iovcnt);
1082 }
1083
1084 static int libc_recv(int sockfd, void *buf, size_t len, int flags)
1085 {
1086         swrap_bind_symbol_all();
1087
1088         return swrap.libc.symbols._libc_recv.f(sockfd, buf, len, flags);
1089 }
1090
1091 static int libc_recvfrom(int sockfd,
1092                          void *buf,
1093                          size_t len,
1094                          int flags,
1095                          struct sockaddr *src_addr,
1096                          socklen_t *addrlen)
1097 {
1098         swrap_bind_symbol_all();
1099
1100         return swrap.libc.symbols._libc_recvfrom.f(sockfd,
1101                                                    buf,
1102                                                    len,
1103                                                    flags,
1104                                                    src_addr,
1105                                                    addrlen);
1106 }
1107
1108 static int libc_recvmsg(int sockfd, struct msghdr *msg, int flags)
1109 {
1110         swrap_bind_symbol_all();
1111
1112         return swrap.libc.symbols._libc_recvmsg.f(sockfd, msg, flags);
1113 }
1114
1115 static int libc_send(int sockfd, const void *buf, size_t len, int flags)
1116 {
1117         swrap_bind_symbol_all();
1118
1119         return swrap.libc.symbols._libc_send.f(sockfd, buf, len, flags);
1120 }
1121
1122 static int libc_sendmsg(int sockfd, const struct msghdr *msg, int flags)
1123 {
1124         swrap_bind_symbol_all();
1125
1126         return swrap.libc.symbols._libc_sendmsg.f(sockfd, msg, flags);
1127 }
1128
1129 static int libc_sendto(int sockfd,
1130                        const void *buf,
1131                        size_t len,
1132                        int flags,
1133                        const  struct sockaddr *dst_addr,
1134                        socklen_t addrlen)
1135 {
1136         swrap_bind_symbol_all();
1137
1138         return swrap.libc.symbols._libc_sendto.f(sockfd,
1139                                                  buf,
1140                                                  len,
1141                                                  flags,
1142                                                  dst_addr,
1143                                                  addrlen);
1144 }
1145
1146 static int libc_setsockopt(int sockfd,
1147                            int level,
1148                            int optname,
1149                            const void *optval,
1150                            socklen_t optlen)
1151 {
1152         swrap_bind_symbol_all();
1153
1154         return swrap.libc.symbols._libc_setsockopt.f(sockfd,
1155                                                      level,
1156                                                      optname,
1157                                                      optval,
1158                                                      optlen);
1159 }
1160
1161 #ifdef HAVE_SIGNALFD
1162 static int libc_signalfd(int fd, const sigset_t *mask, int flags)
1163 {
1164         swrap_bind_symbol_all();
1165
1166         return swrap.libc.symbols._libc_signalfd.f(fd, mask, flags);
1167 }
1168 #endif
1169
1170 static int libc_socket(int domain, int type, int protocol)
1171 {
1172         swrap_bind_symbol_all();
1173
1174         return swrap.libc.symbols._libc_socket.f(domain, type, protocol);
1175 }
1176
1177 static int libc_socketpair(int domain, int type, int protocol, int sv[2])
1178 {
1179         swrap_bind_symbol_all();
1180
1181         return swrap.libc.symbols._libc_socketpair.f(domain, type, protocol, sv);
1182 }
1183
1184 #ifdef HAVE_TIMERFD_CREATE
1185 static int libc_timerfd_create(int clockid, int flags)
1186 {
1187         swrap_bind_symbol_all();
1188
1189         return swrap.libc.symbols._libc_timerfd_create.f(clockid, flags);
1190 }
1191 #endif
1192
1193 static ssize_t libc_write(int fd, const void *buf, size_t count)
1194 {
1195         swrap_bind_symbol_all();
1196
1197         return swrap.libc.symbols._libc_write.f(fd, buf, count);
1198 }
1199
1200 static ssize_t libc_writev(int fd, const struct iovec *iov, int iovcnt)
1201 {
1202         swrap_bind_symbol_all();
1203
1204         return swrap.libc.symbols._libc_writev.f(fd, iov, iovcnt);
1205 }
1206
1207 /* DO NOT call this function during library initialization! */
1208 static void __swrap_bind_symbol_all_once(void)
1209 {
1210 #ifdef HAVE_ACCEPT4
1211         swrap_bind_symbol_libsocket(accept4);
1212 #else
1213         swrap_bind_symbol_libsocket(accept);
1214 #endif
1215         swrap_bind_symbol_libsocket(bind);
1216         swrap_bind_symbol_libc(close);
1217 #ifdef HAVE___CLOSE_NOCANCEL
1218         swrap_bind_symbol_libc(__close_nocancel);
1219 #endif
1220         swrap_bind_symbol_libsocket(connect);
1221         swrap_bind_symbol_libc(dup);
1222         swrap_bind_symbol_libc(dup2);
1223         swrap_bind_symbol_libc(fcntl);
1224         swrap_bind_symbol_libc(fopen);
1225 #ifdef HAVE_FOPEN64
1226         swrap_bind_symbol_libc(fopen64);
1227 #endif
1228 #ifdef HAVE_EVENTFD
1229         swrap_bind_symbol_libc(eventfd);
1230 #endif
1231         swrap_bind_symbol_libsocket(getpeername);
1232         swrap_bind_symbol_libsocket(getsockname);
1233         swrap_bind_symbol_libsocket(getsockopt);
1234         swrap_bind_symbol_libc(ioctl);
1235         swrap_bind_symbol_libsocket(listen);
1236         swrap_bind_symbol_libc(open);
1237 #ifdef HAVE_OPEN64
1238         swrap_bind_symbol_libc(open64);
1239 #endif
1240         swrap_bind_symbol_libc(openat);
1241         swrap_bind_symbol_libsocket(pipe);
1242         swrap_bind_symbol_libc(read);
1243         swrap_bind_symbol_libsocket(readv);
1244         swrap_bind_symbol_libsocket(recv);
1245         swrap_bind_symbol_libsocket(recvfrom);
1246         swrap_bind_symbol_libsocket(recvmsg);
1247         swrap_bind_symbol_libsocket(send);
1248         swrap_bind_symbol_libsocket(sendmsg);
1249         swrap_bind_symbol_libsocket(sendto);
1250         swrap_bind_symbol_libsocket(setsockopt);
1251 #ifdef HAVE_SIGNALFD
1252         swrap_bind_symbol_libsocket(signalfd);
1253 #endif
1254         swrap_bind_symbol_libsocket(socket);
1255         swrap_bind_symbol_libsocket(socketpair);
1256 #ifdef HAVE_TIMERFD_CREATE
1257         swrap_bind_symbol_libc(timerfd_create);
1258 #endif
1259         swrap_bind_symbol_libc(write);
1260         swrap_bind_symbol_libsocket(writev);
1261 }
1262
1263 static void swrap_bind_symbol_all(void)
1264 {
1265         static pthread_once_t all_symbol_binding_once = PTHREAD_ONCE_INIT;
1266
1267         pthread_once(&all_symbol_binding_once, __swrap_bind_symbol_all_once);
1268 }
1269
1270 /*********************************************************
1271  * SWRAP HELPER FUNCTIONS
1272  *********************************************************/
1273
1274 /*
1275  * We return 127.0.0.0 (default) or 10.53.57.0.
1276  *
1277  * This can be controlled by:
1278  * SOCKET_WRAPPER_IPV4_NETWORK=127.0.0.0 (default)
1279  * or
1280  * SOCKET_WRAPPER_IPV4_NETWORK=10.53.57.0
1281  */
1282 static in_addr_t swrap_ipv4_net(void)
1283 {
1284         static int initialized;
1285         static in_addr_t hv;
1286         const char *net_str = NULL;
1287         struct in_addr nv;
1288         int ret;
1289
1290         if (initialized) {
1291                 return hv;
1292         }
1293         initialized = 1;
1294
1295         net_str = getenv("SOCKET_WRAPPER_IPV4_NETWORK");
1296         if (net_str == NULL) {
1297                 net_str = "127.0.0.0";
1298         }
1299
1300         ret = inet_pton(AF_INET, net_str, &nv);
1301         if (ret <= 0) {
1302                 SWRAP_LOG(SWRAP_LOG_ERROR,
1303                           "INVALID IPv4 Network [%s]",
1304                           net_str);
1305                 abort();
1306         }
1307
1308         hv = ntohl(nv.s_addr);
1309
1310         switch (hv) {
1311         case 0x7f000000:
1312                 /* 127.0.0.0 */
1313                 break;
1314         case 0x0a353900:
1315                 /* 10.53.57.0 */
1316                 break;
1317         default:
1318                 SWRAP_LOG(SWRAP_LOG_ERROR,
1319                           "INVALID IPv4 Network [%s][0x%x] should be "
1320                           "127.0.0.0 or 10.53.57.0",
1321                           net_str, (unsigned)hv);
1322                 abort();
1323         }
1324
1325         return hv;
1326 }
1327
1328 /*
1329  * This returns 127.255.255.255 or 10.255.255.255
1330  */
1331 static in_addr_t swrap_ipv4_bcast(void)
1332 {
1333         in_addr_t hv;
1334
1335         hv = swrap_ipv4_net();
1336         hv |= IN_CLASSA_HOST;
1337
1338         return hv;
1339 }
1340
1341 /*
1342  * This returns 127.0.0.${iface} or 10.53.57.${iface}
1343  */
1344 static in_addr_t swrap_ipv4_iface(unsigned int iface)
1345 {
1346         in_addr_t hv;
1347
1348         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
1349                 SWRAP_LOG(SWRAP_LOG_ERROR,
1350                           "swrap_ipv4_iface(%u) invalid!",
1351                           iface);
1352                 abort();
1353                 return -1;
1354         }
1355
1356         hv = swrap_ipv4_net();
1357         hv |= iface;
1358
1359         return hv;
1360 }
1361
1362 #ifdef HAVE_IPV6
1363 /*
1364  * FD00::5357:5FXX
1365  */
1366 static const struct in6_addr *swrap_ipv6(void)
1367 {
1368         static struct in6_addr v;
1369         static int initialized;
1370         int ret;
1371
1372         if (initialized) {
1373                 return &v;
1374         }
1375         initialized = 1;
1376
1377         ret = inet_pton(AF_INET6, "FD00::5357:5F00", &v);
1378         if (ret <= 0) {
1379                 abort();
1380         }
1381
1382         return &v;
1383 }
1384 #endif
1385
1386 static void set_port(int family, int prt, struct swrap_address *addr)
1387 {
1388         switch (family) {
1389         case AF_INET:
1390                 addr->sa.in.sin_port = htons(prt);
1391                 break;
1392 #ifdef HAVE_IPV6
1393         case AF_INET6:
1394                 addr->sa.in6.sin6_port = htons(prt);
1395                 break;
1396 #endif
1397         }
1398 }
1399
1400 static size_t socket_length(int family)
1401 {
1402         switch (family) {
1403         case AF_INET:
1404                 return sizeof(struct sockaddr_in);
1405 #ifdef HAVE_IPV6
1406         case AF_INET6:
1407                 return sizeof(struct sockaddr_in6);
1408 #endif
1409         }
1410         return 0;
1411 }
1412
1413 static struct socket_info *swrap_get_socket_info(int si_index)
1414 {
1415         return (struct socket_info *)(&(sockets[si_index].info));
1416 }
1417
1418 static int swrap_get_refcount(struct socket_info *si)
1419 {
1420         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1421         return sic->meta.refcount;
1422 }
1423
1424 static void swrap_inc_refcount(struct socket_info *si)
1425 {
1426         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1427
1428         sic->meta.refcount += 1;
1429 }
1430
1431 static void swrap_dec_refcount(struct socket_info *si)
1432 {
1433         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1434
1435         sic->meta.refcount -= 1;
1436 }
1437
1438 static int swrap_get_next_free(struct socket_info *si)
1439 {
1440         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1441
1442         return sic->meta.next_free;
1443 }
1444
1445 static void swrap_set_next_free(struct socket_info *si, int next_free)
1446 {
1447         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1448
1449         sic->meta.next_free = next_free;
1450 }
1451
1452 static int swrap_un_path(struct sockaddr_un *un,
1453                          const char *swrap_dir,
1454                          char type,
1455                          unsigned int iface,
1456                          unsigned int prt)
1457 {
1458         int ret;
1459
1460         ret = snprintf(un->sun_path,
1461                        sizeof(un->sun_path),
1462                        "%s/"SOCKET_FORMAT,
1463                        swrap_dir,
1464                        type,
1465                        iface,
1466                        prt);
1467         if ((size_t)ret >= sizeof(un->sun_path)) {
1468                 return ENAMETOOLONG;
1469         }
1470
1471         return 0;
1472 }
1473
1474 static int swrap_un_path_EINVAL(struct sockaddr_un *un,
1475                                 const char *swrap_dir)
1476 {
1477         int ret;
1478
1479         ret = snprintf(un->sun_path,
1480                        sizeof(un->sun_path),
1481                        "%s/EINVAL",
1482                        swrap_dir);
1483
1484         if ((size_t)ret >= sizeof(un->sun_path)) {
1485                 return ENAMETOOLONG;
1486         }
1487
1488         return 0;
1489 }
1490
1491 static bool swrap_dir_usable(const char *swrap_dir)
1492 {
1493         struct sockaddr_un un;
1494         int ret;
1495
1496         ret = swrap_un_path(&un, swrap_dir, SOCKET_TYPE_CHAR_TCP, 0, 0);
1497         if (ret == 0) {
1498                 return true;
1499         }
1500
1501         ret = swrap_un_path_EINVAL(&un, swrap_dir);
1502         if (ret == 0) {
1503                 return true;
1504         }
1505
1506         return false;
1507 }
1508
1509 static char *socket_wrapper_dir(void)
1510 {
1511         char *swrap_dir = NULL;
1512         char *s = getenv("SOCKET_WRAPPER_DIR");
1513         char *t;
1514         bool ok;
1515
1516         if (s == NULL || s[0] == '\0') {
1517                 SWRAP_LOG(SWRAP_LOG_WARN, "SOCKET_WRAPPER_DIR not set");
1518                 return NULL;
1519         }
1520
1521         swrap_dir = realpath(s, NULL);
1522         if (swrap_dir == NULL) {
1523                 SWRAP_LOG(SWRAP_LOG_ERROR,
1524                           "Unable to resolve socket_wrapper dir path: %s - %s",
1525                           s,
1526                           strerror(errno));
1527                 abort();
1528         }
1529
1530         ok = swrap_dir_usable(swrap_dir);
1531         if (ok) {
1532                 goto done;
1533         }
1534
1535         free(swrap_dir);
1536
1537         ok = swrap_dir_usable(s);
1538         if (!ok) {
1539                 SWRAP_LOG(SWRAP_LOG_ERROR, "SOCKET_WRAPPER_DIR is too long");
1540                 abort();
1541         }
1542
1543         t = getenv("SOCKET_WRAPPER_DIR_ALLOW_ORIG");
1544         if (t == NULL) {
1545                 SWRAP_LOG(SWRAP_LOG_ERROR,
1546                           "realpath(SOCKET_WRAPPER_DIR) too long and "
1547                           "SOCKET_WRAPPER_DIR_ALLOW_ORIG not set");
1548                 abort();
1549
1550         }
1551
1552         swrap_dir = strdup(s);
1553         if (swrap_dir == NULL) {
1554                 SWRAP_LOG(SWRAP_LOG_ERROR,
1555                           "Unable to duplicate socket_wrapper dir path");
1556                 abort();
1557         }
1558
1559         SWRAP_LOG(SWRAP_LOG_WARN,
1560                   "realpath(SOCKET_WRAPPER_DIR) too long, "
1561                   "using original SOCKET_WRAPPER_DIR\n");
1562
1563 done:
1564         SWRAP_LOG(SWRAP_LOG_TRACE, "socket_wrapper_dir: %s", swrap_dir);
1565         return swrap_dir;
1566 }
1567
1568 static unsigned int socket_wrapper_mtu(void)
1569 {
1570         static unsigned int max_mtu = 0;
1571         unsigned int tmp;
1572         const char *s;
1573         char *endp;
1574
1575         swrap_mutex_lock(&mtu_update_mutex);
1576
1577         if (max_mtu != 0) {
1578                 goto done;
1579         }
1580
1581         max_mtu = SOCKET_WRAPPER_MTU_DEFAULT;
1582
1583         s = getenv("SOCKET_WRAPPER_MTU");
1584         if (s == NULL) {
1585                 goto done;
1586         }
1587
1588         tmp = strtol(s, &endp, 10);
1589         if (s == endp) {
1590                 goto done;
1591         }
1592
1593         if (tmp < SOCKET_WRAPPER_MTU_MIN || tmp > SOCKET_WRAPPER_MTU_MAX) {
1594                 goto done;
1595         }
1596         max_mtu = tmp;
1597
1598 done:
1599         swrap_mutex_unlock(&mtu_update_mutex);
1600         return max_mtu;
1601 }
1602
1603 static int _socket_wrapper_init_mutex(pthread_mutex_t *m, const char *name)
1604 {
1605         pthread_mutexattr_t ma;
1606         bool need_destroy = false;
1607         int ret = 0;
1608
1609 #define __CHECK(cmd) do { \
1610         ret = cmd; \
1611         if (ret != 0) { \
1612                 SWRAP_LOG(SWRAP_LOG_ERROR, \
1613                           "%s: %s - failed %d", \
1614                           name, #cmd, ret); \
1615                 goto done; \
1616         } \
1617 } while(0)
1618
1619         *m = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
1620         __CHECK(pthread_mutexattr_init(&ma));
1621         need_destroy = true;
1622         __CHECK(pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK));
1623         __CHECK(pthread_mutex_init(m, &ma));
1624 done:
1625         if (need_destroy) {
1626                 pthread_mutexattr_destroy(&ma);
1627         }
1628         return ret;
1629 }
1630
1631 static size_t socket_wrapper_max_sockets(void)
1632 {
1633         const char *s;
1634         size_t tmp;
1635         char *endp;
1636
1637         if (socket_info_max != 0) {
1638                 return socket_info_max;
1639         }
1640
1641         socket_info_max = SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT;
1642
1643         s = getenv("SOCKET_WRAPPER_MAX_SOCKETS");
1644         if (s == NULL || s[0] == '\0') {
1645                 goto done;
1646         }
1647
1648         tmp = strtoul(s, &endp, 10);
1649         if (s == endp) {
1650                 goto done;
1651         }
1652         if (tmp == 0) {
1653                 tmp = SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT;
1654                 SWRAP_LOG(SWRAP_LOG_ERROR,
1655                           "Invalid number of sockets specified, "
1656                           "using default (%zu)",
1657                           tmp);
1658         }
1659
1660         if (tmp > SOCKET_WRAPPER_MAX_SOCKETS_LIMIT) {
1661                 tmp = SOCKET_WRAPPER_MAX_SOCKETS_LIMIT;
1662                 SWRAP_LOG(SWRAP_LOG_ERROR,
1663                           "Invalid number of sockets specified, "
1664                           "using maximum (%zu).",
1665                           tmp);
1666         }
1667
1668         socket_info_max = tmp;
1669
1670 done:
1671         return socket_info_max;
1672 }
1673
1674 static void socket_wrapper_init_fds_idx(void)
1675 {
1676         int *tmp = NULL;
1677         size_t i;
1678
1679         if (socket_fds_idx != NULL) {
1680                 return;
1681         }
1682
1683         tmp = (int *)calloc(socket_fds_max, sizeof(int));
1684         if (tmp == NULL) {
1685                 SWRAP_LOG(SWRAP_LOG_ERROR,
1686                           "Failed to allocate socket fds index array: %s",
1687                           strerror(errno));
1688                 exit(-1);
1689         }
1690
1691         for (i = 0; i < socket_fds_max; i++) {
1692                 tmp[i] = -1;
1693         }
1694
1695         socket_fds_idx = tmp;
1696 }
1697
1698 static void socket_wrapper_init_sockets(void)
1699 {
1700         size_t max_sockets;
1701         size_t i;
1702         int ret = 0;
1703
1704         swrap_bind_symbol_all();
1705
1706         swrap_mutex_lock(&sockets_mutex);
1707
1708         if (sockets != NULL) {
1709                 swrap_mutex_unlock(&sockets_mutex);
1710                 return;
1711         }
1712
1713         SWRAP_LOG(SWRAP_LOG_DEBUG,
1714                   "SOCKET_WRAPPER_PACKAGE[%s] SOCKET_WRAPPER_VERSION[%s]",
1715                   SOCKET_WRAPPER_PACKAGE, SOCKET_WRAPPER_VERSION);
1716
1717         /*
1718          * Intialize the static cache early before
1719          * any thread is able to start.
1720          */
1721         (void)swrap_ipv4_net();
1722
1723         socket_wrapper_init_fds_idx();
1724
1725         /* Needs to be called inside the sockets_mutex lock here. */
1726         max_sockets = socket_wrapper_max_sockets();
1727
1728         sockets = (struct socket_info_container *)calloc(max_sockets,
1729                                         sizeof(struct socket_info_container));
1730
1731         if (sockets == NULL) {
1732                 SWRAP_LOG(SWRAP_LOG_ERROR,
1733                           "Failed to allocate sockets array: %s",
1734                           strerror(errno));
1735                 swrap_mutex_unlock(&sockets_mutex);
1736                 exit(-1);
1737         }
1738
1739         swrap_mutex_lock(&first_free_mutex);
1740         swrap_mutex_lock(&sockets_si_global);
1741
1742         first_free = 0;
1743
1744         for (i = 0; i < max_sockets; i++) {
1745                 swrap_set_next_free(&sockets[i].info, i+1);
1746         }
1747
1748         /* mark the end of the free list */
1749         swrap_set_next_free(&sockets[max_sockets-1].info, -1);
1750
1751         swrap_mutex_unlock(&sockets_si_global);
1752         swrap_mutex_unlock(&first_free_mutex);
1753         swrap_mutex_unlock(&sockets_mutex);
1754         if (ret != 0) {
1755                 exit(-1);
1756         }
1757 }
1758
1759 bool socket_wrapper_enabled(void)
1760 {
1761         char *s = socket_wrapper_dir();
1762
1763         if (s == NULL) {
1764                 return false;
1765         }
1766
1767         SAFE_FREE(s);
1768
1769         socket_wrapper_init_sockets();
1770
1771         return true;
1772 }
1773
1774 static unsigned int socket_wrapper_default_iface(void)
1775 {
1776         const char *s = getenv("SOCKET_WRAPPER_DEFAULT_IFACE");
1777         if (s) {
1778                 unsigned int iface;
1779                 if (sscanf(s, "%u", &iface) == 1) {
1780                         if (iface >= 1 && iface <= MAX_WRAPPED_INTERFACES) {
1781                                 return iface;
1782                         }
1783                 }
1784         }
1785
1786         return 1;/* 127.0.0.1 */
1787 }
1788
1789 static void set_socket_info_index(int fd, int idx)
1790 {
1791         SWRAP_LOG(SWRAP_LOG_TRACE,
1792                   "fd=%d idx=%d",
1793                   fd, idx);
1794         socket_fds_idx[fd] = idx;
1795         /* This builtin issues a full memory barrier. */
1796         __sync_synchronize();
1797 }
1798
1799 static void reset_socket_info_index(int fd)
1800 {
1801         SWRAP_LOG(SWRAP_LOG_TRACE,
1802                   "fd=%d idx=%d",
1803                   fd, -1);
1804         set_socket_info_index(fd, -1);
1805 }
1806
1807 static int find_socket_info_index(int fd)
1808 {
1809         if (fd < 0) {
1810                 return -1;
1811         }
1812
1813         if (socket_fds_idx == NULL) {
1814                 return -1;
1815         }
1816
1817         if ((size_t)fd >= socket_fds_max) {
1818                 /*
1819                  * Do not add a log here as some applications do stupid things
1820                  * like:
1821                  *
1822                  *     for (fd = 0; fd <= getdtablesize(); fd++) {
1823                  *         close(fd)
1824                  *     };
1825                  *
1826                  * This would produce millions of lines of debug messages.
1827                  */
1828 #if 0
1829                 SWRAP_LOG(SWRAP_LOG_ERROR,
1830                           "Looking for a socket info for the fd %d is over the "
1831                           "max socket index limit of %zu.",
1832                           fd,
1833                           socket_fds_max);
1834 #endif
1835                 return -1;
1836         }
1837
1838         /* This builtin issues a full memory barrier. */
1839         __sync_synchronize();
1840         return socket_fds_idx[fd];
1841 }
1842
1843 static int swrap_add_socket_info(const struct socket_info *si_input)
1844 {
1845         struct socket_info *si = NULL;
1846         int si_index = -1;
1847
1848         if (si_input == NULL) {
1849                 errno = EINVAL;
1850                 return -1;
1851         }
1852
1853         swrap_mutex_lock(&first_free_mutex);
1854         if (first_free == -1) {
1855                 errno = ENFILE;
1856                 goto out;
1857         }
1858
1859         si_index = first_free;
1860         si = swrap_get_socket_info(si_index);
1861
1862         SWRAP_LOCK_SI(si);
1863
1864         first_free = swrap_get_next_free(si);
1865         *si = *si_input;
1866         swrap_inc_refcount(si);
1867
1868         SWRAP_UNLOCK_SI(si);
1869
1870 out:
1871         swrap_mutex_unlock(&first_free_mutex);
1872
1873         return si_index;
1874 }
1875
1876 static int swrap_create_socket(struct socket_info *si, int fd)
1877 {
1878         int idx;
1879
1880         if ((size_t)fd >= socket_fds_max) {
1881                 SWRAP_LOG(SWRAP_LOG_ERROR,
1882                           "The max socket index limit of %zu has been reached, "
1883                           "trying to add %d",
1884                           socket_fds_max,
1885                           fd);
1886                 errno = EMFILE;
1887                 return -1;
1888         }
1889
1890         idx = swrap_add_socket_info(si);
1891         if (idx == -1) {
1892                 return -1;
1893         }
1894
1895         set_socket_info_index(fd, idx);
1896
1897         return idx;
1898 }
1899
1900 static int convert_un_in(const struct sockaddr_un *un, struct sockaddr *in, socklen_t *len)
1901 {
1902         unsigned int iface;
1903         unsigned int prt;
1904         const char *p;
1905         char type;
1906
1907         p = strrchr(un->sun_path, '/');
1908         if (p) p++; else p = un->sun_path;
1909
1910         if (sscanf(p, SOCKET_FORMAT, &type, &iface, &prt) != 3) {
1911                 SWRAP_LOG(SWRAP_LOG_ERROR, "sun_path[%s] p[%s]",
1912                           un->sun_path, p);
1913                 errno = EINVAL;
1914                 return -1;
1915         }
1916
1917         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
1918                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
1919                           type, iface, prt);
1920                 errno = EINVAL;
1921                 return -1;
1922         }
1923
1924         if (prt > 0xFFFF) {
1925                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
1926                           type, iface, prt);
1927                 errno = EINVAL;
1928                 return -1;
1929         }
1930
1931         SWRAP_LOG(SWRAP_LOG_TRACE, "type %c iface %u port %u",
1932                   type, iface, prt);
1933
1934         switch(type) {
1935         case SOCKET_TYPE_CHAR_TCP:
1936         case SOCKET_TYPE_CHAR_UDP: {
1937                 struct sockaddr_in *in2 = (struct sockaddr_in *)(void *)in;
1938
1939                 if ((*len) < sizeof(*in2)) {
1940                         SWRAP_LOG(SWRAP_LOG_ERROR,
1941                                   "V4: *len(%zu) < sizeof(*in2)=%zu",
1942                                   (size_t)*len, sizeof(*in2));
1943                         errno = EINVAL;
1944                         return -1;
1945                 }
1946
1947                 memset(in2, 0, sizeof(*in2));
1948                 in2->sin_family = AF_INET;
1949                 in2->sin_addr.s_addr = htonl(swrap_ipv4_iface(iface));
1950                 in2->sin_port = htons(prt);
1951
1952                 *len = sizeof(*in2);
1953                 break;
1954         }
1955 #ifdef HAVE_IPV6
1956         case SOCKET_TYPE_CHAR_TCP_V6:
1957         case SOCKET_TYPE_CHAR_UDP_V6: {
1958                 struct sockaddr_in6 *in2 = (struct sockaddr_in6 *)(void *)in;
1959
1960                 if ((*len) < sizeof(*in2)) {
1961                         SWRAP_LOG(SWRAP_LOG_ERROR,
1962                                   "V6: *len(%zu) < sizeof(*in2)=%zu",
1963                                   (size_t)*len, sizeof(*in2));
1964                         SWRAP_LOG(SWRAP_LOG_ERROR, "LINE:%d", __LINE__);
1965                         errno = EINVAL;
1966                         return -1;
1967                 }
1968
1969                 memset(in2, 0, sizeof(*in2));
1970                 in2->sin6_family = AF_INET6;
1971                 in2->sin6_addr = *swrap_ipv6();
1972                 in2->sin6_addr.s6_addr[15] = iface;
1973                 in2->sin6_port = htons(prt);
1974
1975                 *len = sizeof(*in2);
1976                 break;
1977         }
1978 #endif
1979         default:
1980                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
1981                           type, iface, prt);
1982                 errno = EINVAL;
1983                 return -1;
1984         }
1985
1986         return 0;
1987 }
1988
1989 static int convert_in_un_remote(struct socket_info *si, const struct sockaddr *inaddr, struct sockaddr_un *un,
1990                                 int *bcast)
1991 {
1992         char type = '\0';
1993         unsigned int prt;
1994         unsigned int iface;
1995         int is_bcast = 0;
1996         char *swrap_dir = NULL;
1997
1998         if (bcast) *bcast = 0;
1999
2000         switch (inaddr->sa_family) {
2001         case AF_INET: {
2002                 const struct sockaddr_in *in =
2003                     (const struct sockaddr_in *)(const void *)inaddr;
2004                 unsigned int addr = ntohl(in->sin_addr.s_addr);
2005                 char u_type = '\0';
2006                 char b_type = '\0';
2007                 char a_type = '\0';
2008                 const unsigned int sw_net_addr = swrap_ipv4_net();
2009                 const unsigned int sw_bcast_addr = swrap_ipv4_bcast();
2010
2011                 switch (si->type) {
2012                 case SOCK_STREAM:
2013                         u_type = SOCKET_TYPE_CHAR_TCP;
2014                         break;
2015                 case SOCK_DGRAM:
2016                         u_type = SOCKET_TYPE_CHAR_UDP;
2017                         a_type = SOCKET_TYPE_CHAR_UDP;
2018                         b_type = SOCKET_TYPE_CHAR_UDP;
2019                         break;
2020                 default:
2021                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2022                         errno = ESOCKTNOSUPPORT;
2023                         return -1;
2024                 }
2025
2026                 prt = ntohs(in->sin_port);
2027                 if (a_type && addr == 0xFFFFFFFF) {
2028                         /* 255.255.255.255 only udp */
2029                         is_bcast = 2;
2030                         type = a_type;
2031                         iface = socket_wrapper_default_iface();
2032                 } else if (b_type && addr == sw_bcast_addr) {
2033                         /*
2034                          * 127.255.255.255
2035                          * or
2036                          * 10.255.255.255
2037                          * only udp
2038                          */
2039                         is_bcast = 1;
2040                         type = b_type;
2041                         iface = socket_wrapper_default_iface();
2042                 } else if ((addr & 0xFFFFFF00) == sw_net_addr) {
2043                         /* 127.0.0.X or 10.53.57.X */
2044                         is_bcast = 0;
2045                         type = u_type;
2046                         iface = (addr & 0x000000FF);
2047                 } else {
2048                         char str[256] = {0,};
2049                         inet_ntop(inaddr->sa_family,
2050                                   &in->sin_addr,
2051                                   str, sizeof(str));
2052                         SWRAP_LOG(SWRAP_LOG_WARN,
2053                                   "str[%s] prt[%u]",
2054                                   str, (unsigned)prt);
2055                         errno = ENETUNREACH;
2056                         return -1;
2057                 }
2058                 if (bcast) *bcast = is_bcast;
2059                 break;
2060         }
2061 #ifdef HAVE_IPV6
2062         case AF_INET6: {
2063                 const struct sockaddr_in6 *in =
2064                     (const struct sockaddr_in6 *)(const void *)inaddr;
2065                 struct in6_addr cmp1, cmp2;
2066
2067                 switch (si->type) {
2068                 case SOCK_STREAM:
2069                         type = SOCKET_TYPE_CHAR_TCP_V6;
2070                         break;
2071                 case SOCK_DGRAM:
2072                         type = SOCKET_TYPE_CHAR_UDP_V6;
2073                         break;
2074                 default:
2075                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2076                         errno = ESOCKTNOSUPPORT;
2077                         return -1;
2078                 }
2079
2080                 /* XXX no multicast/broadcast */
2081
2082                 prt = ntohs(in->sin6_port);
2083
2084                 cmp1 = *swrap_ipv6();
2085                 cmp2 = in->sin6_addr;
2086                 cmp2.s6_addr[15] = 0;
2087                 if (IN6_ARE_ADDR_EQUAL(&cmp1, &cmp2)) {
2088                         iface = in->sin6_addr.s6_addr[15];
2089                 } else {
2090                         char str[256] = {0,};
2091                         inet_ntop(inaddr->sa_family,
2092                                   &in->sin6_addr,
2093                                   str, sizeof(str));
2094                         SWRAP_LOG(SWRAP_LOG_WARN,
2095                                   "str[%s] prt[%u]",
2096                                   str, (unsigned)prt);
2097                         errno = ENETUNREACH;
2098                         return -1;
2099                 }
2100
2101                 break;
2102         }
2103 #endif
2104         default:
2105                 SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family!");
2106                 errno = ENETUNREACH;
2107                 return -1;
2108         }
2109
2110         if (prt == 0) {
2111                 SWRAP_LOG(SWRAP_LOG_WARN, "Port not set");
2112                 errno = EINVAL;
2113                 return -1;
2114         }
2115
2116         swrap_dir = socket_wrapper_dir();
2117         if (swrap_dir == NULL) {
2118                 errno = EINVAL;
2119                 return -1;
2120         }
2121
2122         if (is_bcast) {
2123                 swrap_un_path_EINVAL(un, swrap_dir);
2124                 SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2125                 SAFE_FREE(swrap_dir);
2126                 /* the caller need to do more processing */
2127                 return 0;
2128         }
2129
2130         swrap_un_path(un, swrap_dir, type, iface, prt);
2131         SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2132
2133         SAFE_FREE(swrap_dir);
2134
2135         return 0;
2136 }
2137
2138 static int convert_in_un_alloc(struct socket_info *si, const struct sockaddr *inaddr, struct sockaddr_un *un,
2139                                int *bcast)
2140 {
2141         char type = '\0';
2142         unsigned int prt;
2143         unsigned int iface;
2144         struct stat st;
2145         int is_bcast = 0;
2146         char *swrap_dir = NULL;
2147
2148         if (bcast) *bcast = 0;
2149
2150         switch (si->family) {
2151         case AF_INET: {
2152                 const struct sockaddr_in *in =
2153                     (const struct sockaddr_in *)(const void *)inaddr;
2154                 unsigned int addr = ntohl(in->sin_addr.s_addr);
2155                 char u_type = '\0';
2156                 char d_type = '\0';
2157                 char b_type = '\0';
2158                 char a_type = '\0';
2159                 const unsigned int sw_net_addr = swrap_ipv4_net();
2160                 const unsigned int sw_bcast_addr = swrap_ipv4_bcast();
2161
2162                 prt = ntohs(in->sin_port);
2163
2164                 switch (si->type) {
2165                 case SOCK_STREAM:
2166                         u_type = SOCKET_TYPE_CHAR_TCP;
2167                         d_type = SOCKET_TYPE_CHAR_TCP;
2168                         break;
2169                 case SOCK_DGRAM:
2170                         u_type = SOCKET_TYPE_CHAR_UDP;
2171                         d_type = SOCKET_TYPE_CHAR_UDP;
2172                         a_type = SOCKET_TYPE_CHAR_UDP;
2173                         b_type = SOCKET_TYPE_CHAR_UDP;
2174                         break;
2175                 default:
2176                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2177                         errno = ESOCKTNOSUPPORT;
2178                         return -1;
2179                 }
2180
2181                 if (addr == 0) {
2182                         /* 0.0.0.0 */
2183                         is_bcast = 0;
2184                         type = d_type;
2185                         iface = socket_wrapper_default_iface();
2186                 } else if (a_type && addr == 0xFFFFFFFF) {
2187                         /* 255.255.255.255 only udp */
2188                         is_bcast = 2;
2189                         type = a_type;
2190                         iface = socket_wrapper_default_iface();
2191                 } else if (b_type && addr == sw_bcast_addr) {
2192                         /* 127.255.255.255 only udp */
2193                         is_bcast = 1;
2194                         type = b_type;
2195                         iface = socket_wrapper_default_iface();
2196                 } else if ((addr & 0xFFFFFF00) == sw_net_addr) {
2197                         /* 127.0.0.X */
2198                         is_bcast = 0;
2199                         type = u_type;
2200                         iface = (addr & 0x000000FF);
2201                 } else {
2202                         errno = EADDRNOTAVAIL;
2203                         return -1;
2204                 }
2205
2206                 /* Store the bind address for connect() */
2207                 if (si->bindname.sa_socklen == 0) {
2208                         struct sockaddr_in bind_in;
2209                         socklen_t blen = sizeof(struct sockaddr_in);
2210
2211                         ZERO_STRUCT(bind_in);
2212                         bind_in.sin_family = in->sin_family;
2213                         bind_in.sin_port = in->sin_port;
2214                         bind_in.sin_addr.s_addr = htonl(swrap_ipv4_iface(iface));
2215                         si->bindname.sa_socklen = blen;
2216                         memcpy(&si->bindname.sa.in, &bind_in, blen);
2217                 }
2218
2219                 break;
2220         }
2221 #ifdef HAVE_IPV6
2222         case AF_INET6: {
2223                 const struct sockaddr_in6 *in =
2224                     (const struct sockaddr_in6 *)(const void *)inaddr;
2225                 struct in6_addr cmp1, cmp2;
2226
2227                 switch (si->type) {
2228                 case SOCK_STREAM:
2229                         type = SOCKET_TYPE_CHAR_TCP_V6;
2230                         break;
2231                 case SOCK_DGRAM:
2232                         type = SOCKET_TYPE_CHAR_UDP_V6;
2233                         break;
2234                 default:
2235                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2236                         errno = ESOCKTNOSUPPORT;
2237                         return -1;
2238                 }
2239
2240                 /* XXX no multicast/broadcast */
2241
2242                 prt = ntohs(in->sin6_port);
2243
2244                 cmp1 = *swrap_ipv6();
2245                 cmp2 = in->sin6_addr;
2246                 cmp2.s6_addr[15] = 0;
2247                 if (IN6_IS_ADDR_UNSPECIFIED(&in->sin6_addr)) {
2248                         iface = socket_wrapper_default_iface();
2249                 } else if (IN6_ARE_ADDR_EQUAL(&cmp1, &cmp2)) {
2250                         iface = in->sin6_addr.s6_addr[15];
2251                 } else {
2252                         errno = EADDRNOTAVAIL;
2253                         return -1;
2254                 }
2255
2256                 /* Store the bind address for connect() */
2257                 if (si->bindname.sa_socklen == 0) {
2258                         struct sockaddr_in6 bind_in;
2259                         socklen_t blen = sizeof(struct sockaddr_in6);
2260
2261                         ZERO_STRUCT(bind_in);
2262                         bind_in.sin6_family = in->sin6_family;
2263                         bind_in.sin6_port = in->sin6_port;
2264
2265                         bind_in.sin6_addr = *swrap_ipv6();
2266                         bind_in.sin6_addr.s6_addr[15] = iface;
2267
2268                         memcpy(&si->bindname.sa.in6, &bind_in, blen);
2269                         si->bindname.sa_socklen = blen;
2270                 }
2271
2272                 break;
2273         }
2274 #endif
2275         default:
2276                 SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2277                 errno = EADDRNOTAVAIL;
2278                 return -1;
2279         }
2280
2281
2282         if (bcast) *bcast = is_bcast;
2283
2284         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
2285                 errno = EINVAL;
2286                 return -1;
2287         }
2288
2289         swrap_dir = socket_wrapper_dir();
2290         if (swrap_dir == NULL) {
2291                 errno = EINVAL;
2292                 return -1;
2293         }
2294
2295         if (prt == 0) {
2296                 /* handle auto-allocation of ephemeral ports */
2297                 for (prt = 5001; prt < 10000; prt++) {
2298                         swrap_un_path(un, swrap_dir, type, iface, prt);
2299                         if (stat(un->sun_path, &st) == 0) continue;
2300
2301                         set_port(si->family, prt, &si->myname);
2302                         set_port(si->family, prt, &si->bindname);
2303
2304                         break;
2305                 }
2306
2307                 if (prt == 10000) {
2308                         errno = ENFILE;
2309                         SAFE_FREE(swrap_dir);
2310                         return -1;
2311                 }
2312         }
2313
2314         swrap_un_path(un, swrap_dir, type, iface, prt);
2315         SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2316
2317         SAFE_FREE(swrap_dir);
2318
2319         return 0;
2320 }
2321
2322 static struct socket_info *find_socket_info(int fd)
2323 {
2324         int idx = find_socket_info_index(fd);
2325
2326         if (idx == -1) {
2327                 return NULL;
2328         }
2329
2330         return swrap_get_socket_info(idx);
2331 }
2332
2333 #if 0 /* FIXME */
2334 static bool check_addr_port_in_use(const struct sockaddr *sa, socklen_t len)
2335 {
2336         struct socket_info_fd *f;
2337         const struct socket_info *last_s = NULL;
2338
2339         /* first catch invalid input */
2340         switch (sa->sa_family) {
2341         case AF_INET:
2342                 if (len < sizeof(struct sockaddr_in)) {
2343                         return false;
2344                 }
2345                 break;
2346 #ifdef HAVE_IPV6
2347         case AF_INET6:
2348                 if (len < sizeof(struct sockaddr_in6)) {
2349                         return false;
2350                 }
2351                 break;
2352 #endif
2353         default:
2354                 return false;
2355                 break;
2356         }
2357
2358         for (f = socket_fds; f; f = f->next) {
2359                 struct socket_info *s = swrap_get_socket_info(f->si_index);
2360
2361                 if (s == last_s) {
2362                         continue;
2363                 }
2364                 last_s = s;
2365
2366                 if (s->myname == NULL) {
2367                         continue;
2368                 }
2369                 if (s->myname->sa_family != sa->sa_family) {
2370                         continue;
2371                 }
2372                 switch (s->myname->sa_family) {
2373                 case AF_INET: {
2374                         struct sockaddr_in *sin1, *sin2;
2375
2376                         sin1 = (struct sockaddr_in *)s->myname;
2377                         sin2 = (struct sockaddr_in *)sa;
2378
2379                         if (sin1->sin_addr.s_addr == htonl(INADDR_ANY)) {
2380                                 continue;
2381                         }
2382                         if (sin1->sin_port != sin2->sin_port) {
2383                                 continue;
2384                         }
2385                         if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr) {
2386                                 continue;
2387                         }
2388
2389                         /* found */
2390                         return true;
2391                         break;
2392                 }
2393 #ifdef HAVE_IPV6
2394                 case AF_INET6: {
2395                         struct sockaddr_in6 *sin1, *sin2;
2396
2397                         sin1 = (struct sockaddr_in6 *)s->myname;
2398                         sin2 = (struct sockaddr_in6 *)sa;
2399
2400                         if (sin1->sin6_port != sin2->sin6_port) {
2401                                 continue;
2402                         }
2403                         if (!IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2404                                                 &sin2->sin6_addr))
2405                         {
2406                                 continue;
2407                         }
2408
2409                         /* found */
2410                         return true;
2411                         break;
2412                 }
2413 #endif
2414                 default:
2415                         continue;
2416                         break;
2417
2418                 }
2419         }
2420
2421         return false;
2422 }
2423 #endif
2424
2425 static void swrap_remove_stale(int fd);
2426
2427 static int sockaddr_convert_to_un(struct socket_info *si,
2428                                   const struct sockaddr *in_addr,
2429                                   socklen_t in_len,
2430                                   struct sockaddr_un *out_addr,
2431                                   int alloc_sock,
2432                                   int *bcast)
2433 {
2434         struct sockaddr *out = (struct sockaddr *)(void *)out_addr;
2435
2436         (void) in_len; /* unused */
2437
2438         if (out_addr == NULL) {
2439                 return 0;
2440         }
2441
2442         out->sa_family = AF_UNIX;
2443 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2444         out->sa_len = sizeof(*out_addr);
2445 #endif
2446
2447         switch (in_addr->sa_family) {
2448         case AF_UNSPEC: {
2449                 const struct sockaddr_in *sin;
2450                 if (si->family != AF_INET) {
2451                         break;
2452                 }
2453                 if (in_len < sizeof(struct sockaddr_in)) {
2454                         break;
2455                 }
2456                 sin = (const struct sockaddr_in *)(const void *)in_addr;
2457                 if(sin->sin_addr.s_addr != htonl(INADDR_ANY)) {
2458                         break;
2459                 }
2460
2461                 /*
2462                  * Note: in the special case of AF_UNSPEC and INADDR_ANY,
2463                  * AF_UNSPEC is mapped to AF_INET and must be treated here.
2464                  */
2465
2466                 FALL_THROUGH;
2467         }
2468         case AF_INET:
2469 #ifdef HAVE_IPV6
2470         case AF_INET6:
2471 #endif
2472                 switch (si->type) {
2473                 case SOCK_STREAM:
2474                 case SOCK_DGRAM:
2475                         break;
2476                 default:
2477                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2478                         errno = ESOCKTNOSUPPORT;
2479                         return -1;
2480                 }
2481                 if (alloc_sock) {
2482                         return convert_in_un_alloc(si, in_addr, out_addr, bcast);
2483                 } else {
2484                         return convert_in_un_remote(si, in_addr, out_addr, bcast);
2485                 }
2486         default:
2487                 break;
2488         }
2489
2490         errno = EAFNOSUPPORT;
2491         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2492         return -1;
2493 }
2494
2495 static int sockaddr_convert_from_un(const struct socket_info *si,
2496                                     const struct sockaddr_un *in_addr,
2497                                     socklen_t un_addrlen,
2498                                     int family,
2499                                     struct sockaddr *out_addr,
2500                                     socklen_t *out_addrlen)
2501 {
2502         int ret;
2503
2504         if (out_addr == NULL || out_addrlen == NULL)
2505                 return 0;
2506
2507         if (un_addrlen == 0) {
2508                 *out_addrlen = 0;
2509                 return 0;
2510         }
2511
2512         switch (family) {
2513         case AF_INET:
2514 #ifdef HAVE_IPV6
2515         case AF_INET6:
2516 #endif
2517                 switch (si->type) {
2518                 case SOCK_STREAM:
2519                 case SOCK_DGRAM:
2520                         break;
2521                 default:
2522                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2523                         errno = ESOCKTNOSUPPORT;
2524                         return -1;
2525                 }
2526                 ret = convert_un_in(in_addr, out_addr, out_addrlen);
2527 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2528                 out_addr->sa_len = *out_addrlen;
2529 #endif
2530                 return ret;
2531         default:
2532                 break;
2533         }
2534
2535         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2536         errno = EAFNOSUPPORT;
2537         return -1;
2538 }
2539
2540 enum swrap_packet_type {
2541         SWRAP_CONNECT_SEND,
2542         SWRAP_CONNECT_UNREACH,
2543         SWRAP_CONNECT_RECV,
2544         SWRAP_CONNECT_ACK,
2545         SWRAP_ACCEPT_SEND,
2546         SWRAP_ACCEPT_RECV,
2547         SWRAP_ACCEPT_ACK,
2548         SWRAP_RECVFROM,
2549         SWRAP_SENDTO,
2550         SWRAP_SENDTO_UNREACH,
2551         SWRAP_PENDING_RST,
2552         SWRAP_RECV,
2553         SWRAP_RECV_RST,
2554         SWRAP_SEND,
2555         SWRAP_SEND_RST,
2556         SWRAP_CLOSE_SEND,
2557         SWRAP_CLOSE_RECV,
2558         SWRAP_CLOSE_ACK,
2559 };
2560
2561 struct swrap_file_hdr {
2562         uint32_t        magic;
2563         uint16_t        version_major;
2564         uint16_t        version_minor;
2565         int32_t         timezone;
2566         uint32_t        sigfigs;
2567         uint32_t        frame_max_len;
2568 #define SWRAP_FRAME_LENGTH_MAX 0xFFFF
2569         uint32_t        link_type;
2570 };
2571 #define SWRAP_FILE_HDR_SIZE 24
2572
2573 struct swrap_packet_frame {
2574         uint32_t seconds;
2575         uint32_t micro_seconds;
2576         uint32_t recorded_length;
2577         uint32_t full_length;
2578 };
2579 #define SWRAP_PACKET_FRAME_SIZE 16
2580
2581 union swrap_packet_ip {
2582         struct {
2583                 uint8_t         ver_hdrlen;
2584                 uint8_t         tos;
2585                 uint16_t        packet_length;
2586                 uint16_t        identification;
2587                 uint8_t         flags;
2588                 uint8_t         fragment;
2589                 uint8_t         ttl;
2590                 uint8_t         protocol;
2591                 uint16_t        hdr_checksum;
2592                 uint32_t        src_addr;
2593                 uint32_t        dest_addr;
2594         } v4;
2595 #define SWRAP_PACKET_IP_V4_SIZE 20
2596         struct {
2597                 uint8_t         ver_prio;
2598                 uint8_t         flow_label_high;
2599                 uint16_t        flow_label_low;
2600                 uint16_t        payload_length;
2601                 uint8_t         next_header;
2602                 uint8_t         hop_limit;
2603                 uint8_t         src_addr[16];
2604                 uint8_t         dest_addr[16];
2605         } v6;
2606 #define SWRAP_PACKET_IP_V6_SIZE 40
2607 };
2608 #define SWRAP_PACKET_IP_SIZE 40
2609
2610 union swrap_packet_payload {
2611         struct {
2612                 uint16_t        source_port;
2613                 uint16_t        dest_port;
2614                 uint32_t        seq_num;
2615                 uint32_t        ack_num;
2616                 uint8_t         hdr_length;
2617                 uint8_t         control;
2618                 uint16_t        window;
2619                 uint16_t        checksum;
2620                 uint16_t        urg;
2621         } tcp;
2622 #define SWRAP_PACKET_PAYLOAD_TCP_SIZE 20
2623         struct {
2624                 uint16_t        source_port;
2625                 uint16_t        dest_port;
2626                 uint16_t        length;
2627                 uint16_t        checksum;
2628         } udp;
2629 #define SWRAP_PACKET_PAYLOAD_UDP_SIZE 8
2630         struct {
2631                 uint8_t         type;
2632                 uint8_t         code;
2633                 uint16_t        checksum;
2634                 uint32_t        unused;
2635         } icmp4;
2636 #define SWRAP_PACKET_PAYLOAD_ICMP4_SIZE 8
2637         struct {
2638                 uint8_t         type;
2639                 uint8_t         code;
2640                 uint16_t        checksum;
2641                 uint32_t        unused;
2642         } icmp6;
2643 #define SWRAP_PACKET_PAYLOAD_ICMP6_SIZE 8
2644 };
2645 #define SWRAP_PACKET_PAYLOAD_SIZE 20
2646
2647 #define SWRAP_PACKET_MIN_ALLOC \
2648         (SWRAP_PACKET_FRAME_SIZE + \
2649          SWRAP_PACKET_IP_SIZE + \
2650          SWRAP_PACKET_PAYLOAD_SIZE)
2651
2652 static const char *swrap_pcap_init_file(void)
2653 {
2654         static int initialized = 0;
2655         static const char *s = NULL;
2656         static const struct swrap_file_hdr h;
2657         static const struct swrap_packet_frame f;
2658         static const union swrap_packet_ip i;
2659         static const union swrap_packet_payload p;
2660
2661         if (initialized == 1) {
2662                 return s;
2663         }
2664         initialized = 1;
2665
2666         /*
2667          * TODO: don't use the structs use plain buffer offsets
2668          *       and PUSH_U8(), PUSH_U16() and PUSH_U32()
2669          *
2670          * for now make sure we disable PCAP support
2671          * if the struct has alignment!
2672          */
2673         if (sizeof(h) != SWRAP_FILE_HDR_SIZE) {
2674                 return NULL;
2675         }
2676         if (sizeof(f) != SWRAP_PACKET_FRAME_SIZE) {
2677                 return NULL;
2678         }
2679         if (sizeof(i) != SWRAP_PACKET_IP_SIZE) {
2680                 return NULL;
2681         }
2682         if (sizeof(i.v4) != SWRAP_PACKET_IP_V4_SIZE) {
2683                 return NULL;
2684         }
2685         if (sizeof(i.v6) != SWRAP_PACKET_IP_V6_SIZE) {
2686                 return NULL;
2687         }
2688         if (sizeof(p) != SWRAP_PACKET_PAYLOAD_SIZE) {
2689                 return NULL;
2690         }
2691         if (sizeof(p.tcp) != SWRAP_PACKET_PAYLOAD_TCP_SIZE) {
2692                 return NULL;
2693         }
2694         if (sizeof(p.udp) != SWRAP_PACKET_PAYLOAD_UDP_SIZE) {
2695                 return NULL;
2696         }
2697         if (sizeof(p.icmp4) != SWRAP_PACKET_PAYLOAD_ICMP4_SIZE) {
2698                 return NULL;
2699         }
2700         if (sizeof(p.icmp6) != SWRAP_PACKET_PAYLOAD_ICMP6_SIZE) {
2701                 return NULL;
2702         }
2703
2704         s = getenv("SOCKET_WRAPPER_PCAP_FILE");
2705         if (s == NULL) {
2706                 return NULL;
2707         }
2708         if (strncmp(s, "./", 2) == 0) {
2709                 s += 2;
2710         }
2711         SWRAP_LOG(SWRAP_LOG_TRACE, "SOCKET_WRAPPER_PCAP_FILE: %s", s);
2712         return s;
2713 }
2714
2715 static uint8_t *swrap_pcap_packet_init(struct timeval *tval,
2716                                        const struct sockaddr *src,
2717                                        const struct sockaddr *dest,
2718                                        int socket_type,
2719                                        const uint8_t *payload,
2720                                        size_t payload_len,
2721                                        unsigned long tcp_seqno,
2722                                        unsigned long tcp_ack,
2723                                        unsigned char tcp_ctl,
2724                                        int unreachable,
2725                                        size_t *_packet_len)
2726 {
2727         uint8_t *base = NULL;
2728         uint8_t *buf = NULL;
2729         union {
2730                 uint8_t *ptr;
2731                 struct swrap_packet_frame *frame;
2732         } f;
2733         union {
2734                 uint8_t *ptr;
2735                 union swrap_packet_ip *ip;
2736         } i;
2737         union swrap_packet_payload *pay;
2738         size_t packet_len;
2739         size_t alloc_len;
2740         size_t nonwire_len = sizeof(struct swrap_packet_frame);
2741         size_t wire_hdr_len = 0;
2742         size_t wire_len = 0;
2743         size_t ip_hdr_len = 0;
2744         size_t icmp_hdr_len = 0;
2745         size_t icmp_truncate_len = 0;
2746         uint8_t protocol = 0, icmp_protocol = 0;
2747         const struct sockaddr_in *src_in = NULL;
2748         const struct sockaddr_in *dest_in = NULL;
2749 #ifdef HAVE_IPV6
2750         const struct sockaddr_in6 *src_in6 = NULL;
2751         const struct sockaddr_in6 *dest_in6 = NULL;
2752 #endif
2753         uint16_t src_port;
2754         uint16_t dest_port;
2755
2756         switch (src->sa_family) {
2757         case AF_INET:
2758                 src_in = (const struct sockaddr_in *)(const void *)src;
2759                 dest_in = (const struct sockaddr_in *)(const void *)dest;
2760                 src_port = src_in->sin_port;
2761                 dest_port = dest_in->sin_port;
2762                 ip_hdr_len = sizeof(i.ip->v4);
2763                 break;
2764 #ifdef HAVE_IPV6
2765         case AF_INET6:
2766                 src_in6 = (const struct sockaddr_in6 *)(const void *)src;
2767                 dest_in6 = (const struct sockaddr_in6 *)(const void *)dest;
2768                 src_port = src_in6->sin6_port;
2769                 dest_port = dest_in6->sin6_port;
2770                 ip_hdr_len = sizeof(i.ip->v6);
2771                 break;
2772 #endif
2773         default:
2774                 return NULL;
2775         }
2776
2777         switch (socket_type) {
2778         case SOCK_STREAM:
2779                 protocol = 0x06; /* TCP */
2780                 wire_hdr_len = ip_hdr_len + sizeof(pay->tcp);
2781                 wire_len = wire_hdr_len + payload_len;
2782                 break;
2783
2784         case SOCK_DGRAM:
2785                 protocol = 0x11; /* UDP */
2786                 wire_hdr_len = ip_hdr_len + sizeof(pay->udp);
2787                 wire_len = wire_hdr_len + payload_len;
2788                 break;
2789
2790         default:
2791                 return NULL;
2792         }
2793
2794         if (unreachable) {
2795                 icmp_protocol = protocol;
2796                 switch (src->sa_family) {
2797                 case AF_INET:
2798                         protocol = 0x01; /* ICMPv4 */
2799                         icmp_hdr_len = ip_hdr_len + sizeof(pay->icmp4);
2800                         break;
2801 #ifdef HAVE_IPV6
2802                 case AF_INET6:
2803                         protocol = 0x3A; /* ICMPv6 */
2804                         icmp_hdr_len = ip_hdr_len + sizeof(pay->icmp6);
2805                         break;
2806 #endif
2807                 }
2808                 if (wire_len > 64 ) {
2809                         icmp_truncate_len = wire_len - 64;
2810                 }
2811                 wire_len += icmp_hdr_len;
2812         }
2813
2814         packet_len = nonwire_len + wire_len;
2815         alloc_len = packet_len;
2816         if (alloc_len < SWRAP_PACKET_MIN_ALLOC) {
2817                 alloc_len = SWRAP_PACKET_MIN_ALLOC;
2818         }
2819
2820         base = (uint8_t *)calloc(1, alloc_len);
2821         if (base == NULL) {
2822                 return NULL;
2823         }
2824
2825         buf = base;
2826         f.ptr = buf;
2827
2828         f.frame->seconds                = tval->tv_sec;
2829         f.frame->micro_seconds  = tval->tv_usec;
2830         f.frame->recorded_length        = wire_len - icmp_truncate_len;
2831         f.frame->full_length    = wire_len - icmp_truncate_len;
2832
2833         buf += SWRAP_PACKET_FRAME_SIZE;
2834
2835         i.ptr = buf;
2836         switch (src->sa_family) {
2837         case AF_INET:
2838                 if (src_in == NULL || dest_in == NULL) {
2839                         SAFE_FREE(base);
2840                         return NULL;
2841                 }
2842
2843                 i.ip->v4.ver_hdrlen     = 0x45; /* version 4 and 5 * 32 bit words */
2844                 i.ip->v4.tos            = 0x00;
2845                 i.ip->v4.packet_length  = htons(wire_len - icmp_truncate_len);
2846                 i.ip->v4.identification = htons(0xFFFF);
2847                 i.ip->v4.flags          = 0x40; /* BIT 1 set - means don't fragment */
2848                 i.ip->v4.fragment       = htons(0x0000);
2849                 i.ip->v4.ttl            = 0xFF;
2850                 i.ip->v4.protocol       = protocol;
2851                 i.ip->v4.hdr_checksum   = htons(0x0000);
2852                 i.ip->v4.src_addr       = src_in->sin_addr.s_addr;
2853                 i.ip->v4.dest_addr      = dest_in->sin_addr.s_addr;
2854                 buf += SWRAP_PACKET_IP_V4_SIZE;
2855                 break;
2856 #ifdef HAVE_IPV6
2857         case AF_INET6:
2858                 if (src_in6 == NULL || dest_in6 == NULL) {
2859                         SAFE_FREE(base);
2860                         return NULL;
2861                 }
2862
2863                 i.ip->v6.ver_prio               = 0x60; /* version 4 and 5 * 32 bit words */
2864                 i.ip->v6.flow_label_high        = 0x00;
2865                 i.ip->v6.flow_label_low = 0x0000;
2866                 i.ip->v6.payload_length = htons(wire_len - icmp_truncate_len); /* TODO */
2867                 i.ip->v6.next_header    = protocol;
2868                 memcpy(i.ip->v6.src_addr, src_in6->sin6_addr.s6_addr, 16);
2869                 memcpy(i.ip->v6.dest_addr, dest_in6->sin6_addr.s6_addr, 16);
2870                 buf += SWRAP_PACKET_IP_V6_SIZE;
2871                 break;
2872 #endif
2873         }
2874
2875         if (unreachable) {
2876                 pay = (union swrap_packet_payload *)(void *)buf;
2877                 switch (src->sa_family) {
2878                 case AF_INET:
2879                         pay->icmp4.type         = 0x03; /* destination unreachable */
2880                         pay->icmp4.code         = 0x01; /* host unreachable */
2881                         pay->icmp4.checksum     = htons(0x0000);
2882                         pay->icmp4.unused       = htonl(0x00000000);
2883
2884                         buf += SWRAP_PACKET_PAYLOAD_ICMP4_SIZE;
2885
2886                         /* set the ip header in the ICMP payload */
2887                         i.ptr = buf;
2888                         i.ip->v4.ver_hdrlen     = 0x45; /* version 4 and 5 * 32 bit words */
2889                         i.ip->v4.tos            = 0x00;
2890                         i.ip->v4.packet_length  = htons(wire_len - icmp_hdr_len);
2891                         i.ip->v4.identification = htons(0xFFFF);
2892                         i.ip->v4.flags          = 0x40; /* BIT 1 set - means don't fragment */
2893                         i.ip->v4.fragment       = htons(0x0000);
2894                         i.ip->v4.ttl            = 0xFF;
2895                         i.ip->v4.protocol       = icmp_protocol;
2896                         i.ip->v4.hdr_checksum   = htons(0x0000);
2897                         i.ip->v4.src_addr       = dest_in->sin_addr.s_addr;
2898                         i.ip->v4.dest_addr      = src_in->sin_addr.s_addr;
2899
2900                         buf += SWRAP_PACKET_IP_V4_SIZE;
2901
2902                         src_port = dest_in->sin_port;
2903                         dest_port = src_in->sin_port;
2904                         break;
2905 #ifdef HAVE_IPV6
2906                 case AF_INET6:
2907                         pay->icmp6.type         = 0x01; /* destination unreachable */
2908                         pay->icmp6.code         = 0x03; /* address unreachable */
2909                         pay->icmp6.checksum     = htons(0x0000);
2910                         pay->icmp6.unused       = htonl(0x00000000);
2911                         buf += SWRAP_PACKET_PAYLOAD_ICMP6_SIZE;
2912
2913                         /* set the ip header in the ICMP payload */
2914                         i.ptr = buf;
2915                         i.ip->v6.ver_prio               = 0x60; /* version 4 and 5 * 32 bit words */
2916                         i.ip->v6.flow_label_high        = 0x00;
2917                         i.ip->v6.flow_label_low = 0x0000;
2918                         i.ip->v6.payload_length = htons(wire_len - icmp_truncate_len); /* TODO */
2919                         i.ip->v6.next_header    = protocol;
2920                         memcpy(i.ip->v6.src_addr, dest_in6->sin6_addr.s6_addr, 16);
2921                         memcpy(i.ip->v6.dest_addr, src_in6->sin6_addr.s6_addr, 16);
2922
2923                         buf += SWRAP_PACKET_IP_V6_SIZE;
2924
2925                         src_port = dest_in6->sin6_port;
2926                         dest_port = src_in6->sin6_port;
2927                         break;
2928 #endif
2929                 }
2930         }
2931
2932         pay = (union swrap_packet_payload *)(void *)buf;
2933
2934         switch (socket_type) {
2935         case SOCK_STREAM:
2936                 pay->tcp.source_port    = src_port;
2937                 pay->tcp.dest_port      = dest_port;
2938                 pay->tcp.seq_num        = htonl(tcp_seqno);
2939                 pay->tcp.ack_num        = htonl(tcp_ack);
2940                 pay->tcp.hdr_length     = 0x50; /* 5 * 32 bit words */
2941                 pay->tcp.control        = tcp_ctl;
2942                 pay->tcp.window         = htons(0x7FFF);
2943                 pay->tcp.checksum       = htons(0x0000);
2944                 pay->tcp.urg            = htons(0x0000);
2945                 buf += SWRAP_PACKET_PAYLOAD_TCP_SIZE;
2946
2947                 break;
2948
2949         case SOCK_DGRAM:
2950                 pay->udp.source_port    = src_port;
2951                 pay->udp.dest_port      = dest_port;
2952                 pay->udp.length         = htons(8 + payload_len);
2953                 pay->udp.checksum       = htons(0x0000);
2954                 buf += SWRAP_PACKET_PAYLOAD_UDP_SIZE;
2955
2956                 break;
2957         }
2958
2959         if (payload && payload_len > 0) {
2960                 memcpy(buf, payload, payload_len);
2961         }
2962
2963         *_packet_len = packet_len - icmp_truncate_len;
2964         return base;
2965 }
2966
2967 static int swrap_pcap_get_fd(const char *fname)
2968 {
2969         static int fd = -1;
2970
2971         if (fd != -1) {
2972                 return fd;
2973         }
2974
2975         fd = libc_open(fname, O_WRONLY|O_CREAT|O_EXCL|O_APPEND, 0644);
2976         if (fd != -1) {
2977                 struct swrap_file_hdr file_hdr;
2978                 file_hdr.magic          = 0xA1B2C3D4;
2979                 file_hdr.version_major  = 0x0002;
2980                 file_hdr.version_minor  = 0x0004;
2981                 file_hdr.timezone       = 0x00000000;
2982                 file_hdr.sigfigs        = 0x00000000;
2983                 file_hdr.frame_max_len  = SWRAP_FRAME_LENGTH_MAX;
2984                 file_hdr.link_type      = 0x0065; /* 101 RAW IP */
2985
2986                 if (libc_write(fd, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
2987                         libc_close(fd);
2988                         fd = -1;
2989                 }
2990                 return fd;
2991         }
2992
2993         fd = libc_open(fname, O_WRONLY|O_APPEND, 0644);
2994
2995         return fd;
2996 }
2997
2998 static uint8_t *swrap_pcap_marshall_packet(struct socket_info *si,
2999                                            const struct sockaddr *addr,
3000                                            enum swrap_packet_type type,
3001                                            const void *buf, size_t len,
3002                                            size_t *packet_len)
3003 {
3004         const struct sockaddr *src_addr;
3005         const struct sockaddr *dest_addr;
3006         unsigned long tcp_seqno = 0;
3007         unsigned long tcp_ack = 0;
3008         unsigned char tcp_ctl = 0;
3009         int unreachable = 0;
3010
3011         struct timeval tv;
3012
3013         switch (si->family) {
3014         case AF_INET:
3015                 break;
3016 #ifdef HAVE_IPV6
3017         case AF_INET6:
3018                 break;
3019 #endif
3020         default:
3021                 return NULL;
3022         }
3023
3024         switch (type) {
3025         case SWRAP_CONNECT_SEND:
3026                 if (si->type != SOCK_STREAM) {
3027                         return NULL;
3028                 }
3029
3030                 src_addr  = &si->myname.sa.s;
3031                 dest_addr = addr;
3032
3033                 tcp_seqno = si->io.pck_snd;
3034                 tcp_ack = si->io.pck_rcv;
3035                 tcp_ctl = 0x02; /* SYN */
3036
3037                 si->io.pck_snd += 1;
3038
3039                 break;
3040
3041         case SWRAP_CONNECT_RECV:
3042                 if (si->type != SOCK_STREAM) {
3043                         return NULL;
3044                 }
3045
3046                 dest_addr = &si->myname.sa.s;
3047                 src_addr = addr;
3048
3049                 tcp_seqno = si->io.pck_rcv;
3050                 tcp_ack = si->io.pck_snd;
3051                 tcp_ctl = 0x12; /** SYN,ACK */
3052
3053                 si->io.pck_rcv += 1;
3054
3055                 break;
3056
3057         case SWRAP_CONNECT_UNREACH:
3058                 if (si->type != SOCK_STREAM) {
3059                         return NULL;
3060                 }
3061
3062                 dest_addr = &si->myname.sa.s;
3063                 src_addr  = addr;
3064
3065                 /* Unreachable: resend the data of SWRAP_CONNECT_SEND */
3066                 tcp_seqno = si->io.pck_snd - 1;
3067                 tcp_ack = si->io.pck_rcv;
3068                 tcp_ctl = 0x02; /* SYN */
3069                 unreachable = 1;
3070
3071                 break;
3072
3073         case SWRAP_CONNECT_ACK:
3074                 if (si->type != SOCK_STREAM) {
3075                         return NULL;
3076                 }
3077
3078                 src_addr  = &si->myname.sa.s;
3079                 dest_addr = addr;
3080
3081                 tcp_seqno = si->io.pck_snd;
3082                 tcp_ack = si->io.pck_rcv;
3083                 tcp_ctl = 0x10; /* ACK */
3084
3085                 break;
3086
3087         case SWRAP_ACCEPT_SEND:
3088                 if (si->type != SOCK_STREAM) {
3089                         return NULL;
3090                 }
3091
3092                 dest_addr = &si->myname.sa.s;
3093                 src_addr = addr;
3094
3095                 tcp_seqno = si->io.pck_rcv;
3096                 tcp_ack = si->io.pck_snd;
3097                 tcp_ctl = 0x02; /* SYN */
3098
3099                 si->io.pck_rcv += 1;
3100
3101                 break;
3102
3103         case SWRAP_ACCEPT_RECV:
3104                 if (si->type != SOCK_STREAM) {
3105                         return NULL;
3106                 }
3107
3108                 src_addr = &si->myname.sa.s;
3109                 dest_addr = addr;
3110
3111                 tcp_seqno = si->io.pck_snd;
3112                 tcp_ack = si->io.pck_rcv;
3113                 tcp_ctl = 0x12; /* SYN,ACK */
3114
3115                 si->io.pck_snd += 1;
3116
3117                 break;
3118
3119         case SWRAP_ACCEPT_ACK:
3120                 if (si->type != SOCK_STREAM) {
3121                         return NULL;
3122                 }
3123
3124                 dest_addr = &si->myname.sa.s;
3125                 src_addr = addr;
3126
3127                 tcp_seqno = si->io.pck_rcv;
3128                 tcp_ack = si->io.pck_snd;
3129                 tcp_ctl = 0x10; /* ACK */
3130
3131                 break;
3132
3133         case SWRAP_SEND:
3134                 src_addr  = &si->myname.sa.s;
3135                 dest_addr = &si->peername.sa.s;
3136
3137                 tcp_seqno = si->io.pck_snd;
3138                 tcp_ack = si->io.pck_rcv;
3139                 tcp_ctl = 0x18; /* PSH,ACK */
3140
3141                 si->io.pck_snd += len;
3142
3143                 break;
3144
3145         case SWRAP_SEND_RST:
3146                 dest_addr = &si->myname.sa.s;
3147                 src_addr  = &si->peername.sa.s;
3148
3149                 if (si->type == SOCK_DGRAM) {
3150                         return swrap_pcap_marshall_packet(si,
3151                                                           &si->peername.sa.s,
3152                                                           SWRAP_SENDTO_UNREACH,
3153                                                           buf,
3154                                                           len,
3155                                                           packet_len);
3156                 }
3157
3158                 tcp_seqno = si->io.pck_rcv;
3159                 tcp_ack = si->io.pck_snd;
3160                 tcp_ctl = 0x14; /** RST,ACK */
3161
3162                 break;
3163
3164         case SWRAP_PENDING_RST:
3165                 dest_addr = &si->myname.sa.s;
3166                 src_addr  = &si->peername.sa.s;
3167
3168                 if (si->type == SOCK_DGRAM) {
3169                         return NULL;
3170                 }
3171
3172                 tcp_seqno = si->io.pck_rcv;
3173                 tcp_ack = si->io.pck_snd;
3174                 tcp_ctl = 0x14; /* RST,ACK */
3175
3176                 break;
3177
3178         case SWRAP_RECV:
3179                 dest_addr = &si->myname.sa.s;
3180                 src_addr  = &si->peername.sa.s;
3181
3182                 tcp_seqno = si->io.pck_rcv;
3183                 tcp_ack = si->io.pck_snd;
3184                 tcp_ctl = 0x18; /* PSH,ACK */
3185
3186                 si->io.pck_rcv += len;
3187
3188                 break;
3189
3190         case SWRAP_RECV_RST:
3191                 dest_addr = &si->myname.sa.s;
3192                 src_addr  = &si->peername.sa.s;
3193
3194                 if (si->type == SOCK_DGRAM) {
3195                         return NULL;
3196                 }
3197
3198                 tcp_seqno = si->io.pck_rcv;
3199                 tcp_ack = si->io.pck_snd;
3200                 tcp_ctl = 0x14; /* RST,ACK */
3201
3202                 break;
3203
3204         case SWRAP_SENDTO:
3205                 src_addr = &si->myname.sa.s;
3206                 dest_addr = addr;
3207
3208                 si->io.pck_snd += len;
3209
3210                 break;
3211
3212         case SWRAP_SENDTO_UNREACH:
3213                 dest_addr = &si->myname.sa.s;
3214                 src_addr = addr;
3215
3216                 unreachable = 1;
3217
3218                 break;
3219
3220         case SWRAP_RECVFROM:
3221                 dest_addr = &si->myname.sa.s;
3222                 src_addr = addr;
3223
3224                 si->io.pck_rcv += len;
3225
3226                 break;
3227
3228         case SWRAP_CLOSE_SEND:
3229                 if (si->type != SOCK_STREAM) {
3230                         return NULL;
3231                 }
3232
3233                 src_addr  = &si->myname.sa.s;
3234                 dest_addr = &si->peername.sa.s;
3235
3236                 tcp_seqno = si->io.pck_snd;
3237                 tcp_ack = si->io.pck_rcv;
3238                 tcp_ctl = 0x11; /* FIN, ACK */
3239
3240                 si->io.pck_snd += 1;
3241
3242                 break;
3243
3244         case SWRAP_CLOSE_RECV:
3245                 if (si->type != SOCK_STREAM) {
3246                         return NULL;
3247                 }
3248
3249                 dest_addr = &si->myname.sa.s;
3250                 src_addr  = &si->peername.sa.s;
3251
3252                 tcp_seqno = si->io.pck_rcv;
3253                 tcp_ack = si->io.pck_snd;
3254                 tcp_ctl = 0x11; /* FIN,ACK */
3255
3256                 si->io.pck_rcv += 1;
3257
3258                 break;
3259
3260         case SWRAP_CLOSE_ACK:
3261                 if (si->type != SOCK_STREAM) {
3262                         return NULL;
3263                 }
3264
3265                 src_addr  = &si->myname.sa.s;
3266                 dest_addr = &si->peername.sa.s;
3267
3268                 tcp_seqno = si->io.pck_snd;
3269                 tcp_ack = si->io.pck_rcv;
3270                 tcp_ctl = 0x10; /* ACK */
3271
3272                 break;
3273         default:
3274                 return NULL;
3275         }
3276
3277         swrapGetTimeOfDay(&tv);
3278
3279         return swrap_pcap_packet_init(&tv,
3280                                       src_addr,
3281                                       dest_addr,
3282                                       si->type,
3283                                       (const uint8_t *)buf,
3284                                       len,
3285                                       tcp_seqno,
3286                                       tcp_ack,
3287                                       tcp_ctl,
3288                                       unreachable,
3289                                       packet_len);
3290 }
3291
3292 static void swrap_pcap_dump_packet(struct socket_info *si,
3293                                    const struct sockaddr *addr,
3294                                    enum swrap_packet_type type,
3295                                    const void *buf, size_t len)
3296 {
3297         const char *file_name;
3298         uint8_t *packet;
3299         size_t packet_len = 0;
3300         int fd;
3301
3302         swrap_mutex_lock(&pcap_dump_mutex);
3303
3304         file_name = swrap_pcap_init_file();
3305         if (!file_name) {
3306                 goto done;
3307         }
3308
3309         packet = swrap_pcap_marshall_packet(si,
3310                                             addr,
3311                                             type,
3312                                             buf,
3313                                             len,
3314                                             &packet_len);
3315         if (packet == NULL) {
3316                 goto done;
3317         }
3318
3319         fd = swrap_pcap_get_fd(file_name);
3320         if (fd != -1) {
3321                 if (libc_write(fd, packet, packet_len) != (ssize_t)packet_len) {
3322                         free(packet);
3323                         goto done;
3324                 }
3325         }
3326
3327         free(packet);
3328
3329 done:
3330         swrap_mutex_unlock(&pcap_dump_mutex);
3331 }
3332
3333 /****************************************************************************
3334  *   SIGNALFD
3335  ***************************************************************************/
3336
3337 #ifdef HAVE_SIGNALFD
3338 static int swrap_signalfd(int fd, const sigset_t *mask, int flags)
3339 {
3340         int rc;
3341
3342         rc = libc_signalfd(fd, mask, flags);
3343         if (rc != -1) {
3344                 swrap_remove_stale(fd);
3345         }
3346
3347         return rc;
3348 }
3349
3350 int signalfd(int fd, const sigset_t *mask, int flags)
3351 {
3352         return swrap_signalfd(fd, mask, flags);
3353 }
3354 #endif
3355
3356 /****************************************************************************
3357  *   SOCKET
3358  ***************************************************************************/
3359
3360 static int swrap_socket(int family, int type, int protocol)
3361 {
3362         struct socket_info *si = NULL;
3363         struct socket_info _si = { 0 };
3364         int fd;
3365         int ret;
3366         int real_type = type;
3367
3368         /*
3369          * Remove possible addition flags passed to socket() so
3370          * do not fail checking the type.
3371          * See https://lwn.net/Articles/281965/
3372          */
3373 #ifdef SOCK_CLOEXEC
3374         real_type &= ~SOCK_CLOEXEC;
3375 #endif
3376 #ifdef SOCK_NONBLOCK
3377         real_type &= ~SOCK_NONBLOCK;
3378 #endif
3379
3380         if (!socket_wrapper_enabled()) {
3381                 return libc_socket(family, type, protocol);
3382         }
3383
3384         switch (family) {
3385         case AF_INET:
3386 #ifdef HAVE_IPV6
3387         case AF_INET6:
3388 #endif
3389                 break;
3390 #ifdef AF_NETLINK
3391         case AF_NETLINK:
3392 #endif /* AF_NETLINK */
3393 #ifdef AF_PACKET
3394         case AF_PACKET:
3395 #endif /* AF_PACKET */
3396         case AF_UNIX:
3397                 fd = libc_socket(family, type, protocol);
3398                 if (fd != -1) {
3399                         /* Check if we have a stale fd and remove it */
3400                         swrap_remove_stale(fd);
3401                         SWRAP_LOG(SWRAP_LOG_TRACE,
3402                                   "Unix socket fd=%d",
3403                                   fd);
3404                 }
3405                 return fd;
3406         default:
3407                 errno = EAFNOSUPPORT;
3408                 return -1;
3409         }
3410
3411         switch (real_type) {
3412         case SOCK_STREAM:
3413                 break;
3414         case SOCK_DGRAM:
3415                 break;
3416         default:
3417                 errno = EPROTONOSUPPORT;
3418                 return -1;
3419         }
3420
3421         switch (protocol) {
3422         case 0:
3423                 break;
3424         case 6:
3425                 if (real_type == SOCK_STREAM) {
3426                         break;
3427                 }
3428                 FALL_THROUGH;
3429         case 17:
3430                 if (real_type == SOCK_DGRAM) {
3431                         break;
3432                 }
3433                 FALL_THROUGH;
3434         default:
3435                 errno = EPROTONOSUPPORT;
3436                 return -1;
3437         }
3438
3439         /*
3440          * We must call libc_socket with type, from the caller, not the version
3441          * we removed SOCK_CLOEXEC and SOCK_NONBLOCK from
3442          */
3443         fd = libc_socket(AF_UNIX, type, 0);
3444
3445         if (fd == -1) {
3446                 return -1;
3447         }
3448
3449         /* Check if we have a stale fd and remove it */
3450         swrap_remove_stale(fd);
3451
3452         si = &_si;
3453         si->family = family;
3454
3455         /* however, the rest of the socket_wrapper code expects just
3456          * the type, not the flags */
3457         si->type = real_type;
3458         si->protocol = protocol;
3459
3460         /*
3461          * Setup myname so getsockname() can succeed to find out the socket
3462          * type.
3463          */
3464         switch(si->family) {
3465         case AF_INET: {
3466                 struct sockaddr_in sin = {
3467                         .sin_family = AF_INET,
3468                 };
3469
3470                 si->myname.sa_socklen = sizeof(struct sockaddr_in);
3471                 memcpy(&si->myname.sa.in, &sin, si->myname.sa_socklen);
3472                 break;
3473         }
3474 #ifdef HAVE_IPV6
3475         case AF_INET6: {
3476                 struct sockaddr_in6 sin6 = {
3477                         .sin6_family = AF_INET6,
3478                 };
3479
3480                 si->myname.sa_socklen = sizeof(struct sockaddr_in6);
3481                 memcpy(&si->myname.sa.in6, &sin6, si->myname.sa_socklen);
3482                 break;
3483         }
3484 #endif
3485         default:
3486                 errno = EINVAL;
3487                 return -1;
3488         }
3489
3490         ret = swrap_create_socket(si, fd);
3491         if (ret == -1) {
3492                 int saved_errno = errno;
3493                 libc_close(fd);
3494                 errno = saved_errno;
3495                 return -1;
3496         }
3497
3498         SWRAP_LOG(SWRAP_LOG_TRACE,
3499                   "Created %s socket for protocol %s, fd=%d",
3500                   family == AF_INET ? "IPv4" : "IPv6",
3501                   real_type == SOCK_DGRAM ? "UDP" : "TCP",
3502                   fd);
3503
3504         return fd;
3505 }
3506
3507 int socket(int family, int type, int protocol)
3508 {
3509         return swrap_socket(family, type, protocol);
3510 }
3511
3512 /****************************************************************************
3513  *   SOCKETPAIR
3514  ***************************************************************************/
3515
3516 static int swrap_socketpair(int family, int type, int protocol, int sv[2])
3517 {
3518         int rc;
3519
3520         rc = libc_socketpair(family, type, protocol, sv);
3521         if (rc != -1) {
3522                 swrap_remove_stale(sv[0]);
3523                 swrap_remove_stale(sv[1]);
3524         }
3525
3526         return rc;
3527 }
3528
3529 int socketpair(int family, int type, int protocol, int sv[2])
3530 {
3531         return swrap_socketpair(family, type, protocol, sv);
3532 }
3533
3534 /****************************************************************************
3535  *   SOCKETPAIR
3536  ***************************************************************************/
3537
3538 #ifdef HAVE_TIMERFD_CREATE
3539 static int swrap_timerfd_create(int clockid, int flags)
3540 {
3541         int fd;
3542
3543         fd = libc_timerfd_create(clockid, flags);
3544         if (fd != -1) {
3545                 swrap_remove_stale(fd);
3546         }
3547
3548         return fd;
3549 }
3550
3551 int timerfd_create(int clockid, int flags)
3552 {
3553         return swrap_timerfd_create(clockid, flags);
3554 }
3555 #endif
3556
3557 /****************************************************************************
3558  *   PIPE
3559  ***************************************************************************/
3560
3561 static int swrap_pipe(int pipefd[2])
3562 {
3563         int rc;
3564
3565         rc = libc_pipe(pipefd);
3566         if (rc != -1) {
3567                 swrap_remove_stale(pipefd[0]);
3568                 swrap_remove_stale(pipefd[1]);
3569         }
3570
3571         return rc;
3572 }
3573
3574 int pipe(int pipefd[2])
3575 {
3576         return swrap_pipe(pipefd);
3577 }
3578
3579 /****************************************************************************
3580  *   ACCEPT
3581  ***************************************************************************/
3582
3583 static int swrap_accept(int s,
3584                         struct sockaddr *addr,
3585                         socklen_t *addrlen,
3586                         int flags)
3587 {
3588         struct socket_info *parent_si, *child_si;
3589         struct socket_info new_si = { 0 };
3590         int fd;
3591         int idx;
3592         struct swrap_address un_addr = {
3593                 .sa_socklen = sizeof(struct sockaddr_un),
3594         };
3595         struct swrap_address un_my_addr = {
3596                 .sa_socklen = sizeof(struct sockaddr_un),
3597         };
3598         struct swrap_address in_addr = {
3599                 .sa_socklen = sizeof(struct sockaddr_storage),
3600         };
3601         struct swrap_address in_my_addr = {
3602                 .sa_socklen = sizeof(struct sockaddr_storage),
3603         };
3604         int ret;
3605
3606         parent_si = find_socket_info(s);
3607         if (!parent_si) {
3608 #ifdef HAVE_ACCEPT4
3609                 return libc_accept4(s, addr, addrlen, flags);
3610 #else
3611                 UNUSED(flags);
3612                 return libc_accept(s, addr, addrlen);
3613 #endif
3614         }
3615
3616
3617         /*
3618          * prevent parent_si from being altered / closed
3619          * while we read it
3620          */
3621         SWRAP_LOCK_SI(parent_si);
3622
3623         /*
3624          * assume out sockaddr have the same size as the in parent
3625          * socket family
3626          */
3627         in_addr.sa_socklen = socket_length(parent_si->family);
3628         if (in_addr.sa_socklen <= 0) {
3629                 SWRAP_UNLOCK_SI(parent_si);
3630                 errno = EINVAL;
3631                 return -1;
3632         }
3633
3634         SWRAP_UNLOCK_SI(parent_si);
3635
3636 #ifdef HAVE_ACCEPT4
3637         ret = libc_accept4(s, &un_addr.sa.s, &un_addr.sa_socklen, flags);
3638 #else
3639         UNUSED(flags);
3640         ret = libc_accept(s, &un_addr.sa.s, &un_addr.sa_socklen);
3641 #endif
3642         if (ret == -1) {
3643                 int saved_errno = errno;
3644                 if (saved_errno == ENOTSOCK) {
3645                         /* Remove stale fds */
3646                         swrap_remove_stale(s);
3647                 }
3648                 errno = saved_errno;
3649                 return ret;
3650         }
3651
3652         fd = ret;
3653
3654         /* Check if we have a stale fd and remove it */
3655         swrap_remove_stale(fd);
3656
3657         if (un_addr.sa.un.sun_path[0] == '\0') {
3658                 /*
3659                  * FreeBSD seems to have a problem where
3660                  * accept4() on the unix socket doesn't
3661                  * ECONNABORTED for already disconnected connections.
3662                  *
3663                  * Let's try libc_getpeername() to get the peer address
3664                  * as a fallback, but it'll likely return ENOTCONN,
3665                  * which we have to map to ECONNABORTED.
3666                  */
3667                 un_addr.sa_socklen = sizeof(struct sockaddr_un),
3668                 ret = libc_getpeername(fd, &un_addr.sa.s, &un_addr.sa_socklen);
3669                 if (ret == -1) {
3670                         int saved_errno = errno;
3671                         libc_close(fd);
3672                         if (saved_errno == ENOTCONN) {
3673                                 /*
3674                                  * If the connection is already disconnected
3675                                  * we should return ECONNABORTED.
3676                                  */
3677                                 saved_errno = ECONNABORTED;
3678                         }
3679                         errno = saved_errno;
3680                         return ret;
3681                 }
3682         }
3683
3684         ret = libc_getsockname(fd,
3685                                &un_my_addr.sa.s,
3686                                &un_my_addr.sa_socklen);
3687         if (ret == -1) {
3688                 int saved_errno = errno;
3689                 libc_close(fd);
3690                 if (saved_errno == ENOTCONN) {
3691                         /*
3692                          * If the connection is already disconnected
3693                          * we should return ECONNABORTED.
3694                          */
3695                         saved_errno = ECONNABORTED;
3696                 }
3697                 errno = saved_errno;
3698                 return ret;
3699         }
3700
3701         SWRAP_LOCK_SI(parent_si);
3702
3703         ret = sockaddr_convert_from_un(parent_si,
3704                                        &un_addr.sa.un,
3705                                        un_addr.sa_socklen,
3706                                        parent_si->family,
3707                                        &in_addr.sa.s,
3708                                        &in_addr.sa_socklen);
3709         if (ret == -1) {
3710                 int saved_errno = errno;
3711                 SWRAP_UNLOCK_SI(parent_si);
3712                 libc_close(fd);
3713                 errno = saved_errno;
3714                 return ret;
3715         }
3716
3717         child_si = &new_si;
3718
3719         child_si->family = parent_si->family;
3720         child_si->type = parent_si->type;
3721         child_si->protocol = parent_si->protocol;
3722         child_si->bound = 1;
3723         child_si->is_server = 1;
3724         child_si->connected = 1;
3725
3726         SWRAP_UNLOCK_SI(parent_si);
3727
3728         child_si->peername = (struct swrap_address) {
3729                 .sa_socklen = in_addr.sa_socklen,
3730         };
3731         memcpy(&child_si->peername.sa.ss, &in_addr.sa.ss, in_addr.sa_socklen);
3732
3733         if (addr != NULL && addrlen != NULL) {
3734                 size_t copy_len = MIN(*addrlen, in_addr.sa_socklen);
3735                 if (copy_len > 0) {
3736                         memcpy(addr, &in_addr.sa.ss, copy_len);
3737                 }
3738                 *addrlen = in_addr.sa_socklen;
3739         }
3740
3741         ret = sockaddr_convert_from_un(child_si,
3742                                        &un_my_addr.sa.un,
3743                                        un_my_addr.sa_socklen,
3744                                        child_si->family,
3745                                        &in_my_addr.sa.s,
3746                                        &in_my_addr.sa_socklen);
3747         if (ret == -1) {
3748                 int saved_errno = errno;
3749                 libc_close(fd);
3750                 errno = saved_errno;
3751                 return ret;
3752         }
3753
3754         SWRAP_LOG(SWRAP_LOG_TRACE,
3755                   "accept() path=%s, fd=%d",
3756                   un_my_addr.sa.un.sun_path, s);
3757
3758         child_si->myname = (struct swrap_address) {
3759                 .sa_socklen = in_my_addr.sa_socklen,
3760         };
3761         memcpy(&child_si->myname.sa.ss, &in_my_addr.sa.ss, in_my_addr.sa_socklen);
3762
3763         idx = swrap_create_socket(&new_si, fd);
3764         if (idx == -1) {
3765                 int saved_errno = errno;
3766                 libc_close(fd);
3767                 errno = saved_errno;
3768                 return -1;
3769         }
3770
3771         if (addr != NULL) {
3772                 struct socket_info *si = swrap_get_socket_info(idx);
3773
3774                 SWRAP_LOCK_SI(si);
3775                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_SEND, NULL, 0);
3776                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_RECV, NULL, 0);
3777                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_ACK, NULL, 0);
3778                 SWRAP_UNLOCK_SI(si);
3779         }
3780
3781         return fd;
3782 }
3783
3784 #ifdef HAVE_ACCEPT4
3785 int accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags)
3786 {
3787         return swrap_accept(s, addr, (socklen_t *)addrlen, flags);
3788 }
3789 #endif
3790
3791 #ifdef HAVE_ACCEPT_PSOCKLEN_T
3792 int accept(int s, struct sockaddr *addr, Psocklen_t addrlen)
3793 #else
3794 int accept(int s, struct sockaddr *addr, socklen_t *addrlen)
3795 #endif
3796 {
3797         return swrap_accept(s, addr, (socklen_t *)addrlen, 0);
3798 }
3799
3800 static int autobind_start_init;
3801 static int autobind_start;
3802
3803 /* using sendto() or connect() on an unbound socket would give the
3804    recipient no way to reply, as unlike UDP and TCP, a unix domain
3805    socket can't auto-assign ephemeral port numbers, so we need to
3806    assign it here.
3807    Note: this might change the family from ipv6 to ipv4
3808 */
3809 static int swrap_auto_bind(int fd, struct socket_info *si, int family)
3810 {
3811         struct swrap_address un_addr = {
3812                 .sa_socklen = sizeof(struct sockaddr_un),
3813         };
3814         int i;
3815         char type;
3816         int ret;
3817         int port;
3818         struct stat st;
3819         char *swrap_dir = NULL;
3820
3821         swrap_mutex_lock(&autobind_start_mutex);
3822
3823         if (autobind_start_init != 1) {
3824                 autobind_start_init = 1;
3825                 autobind_start = getpid();
3826                 autobind_start %= 50000;
3827                 autobind_start += 10000;
3828         }
3829
3830         un_addr.sa.un.sun_family = AF_UNIX;
3831
3832         switch (family) {
3833         case AF_INET: {
3834                 struct sockaddr_in in;
3835
3836                 switch (si->type) {
3837                 case SOCK_STREAM:
3838                         type = SOCKET_TYPE_CHAR_TCP;
3839                         break;
3840                 case SOCK_DGRAM:
3841                         type = SOCKET_TYPE_CHAR_UDP;
3842                         break;
3843                 default:
3844                         errno = ESOCKTNOSUPPORT;
3845                         ret = -1;
3846                         goto done;
3847                 }
3848
3849                 memset(&in, 0, sizeof(in));
3850                 in.sin_family = AF_INET;
3851                 in.sin_addr.s_addr = htonl(swrap_ipv4_iface(
3852                                            socket_wrapper_default_iface()));
3853
3854                 si->myname = (struct swrap_address) {
3855                         .sa_socklen = sizeof(in),
3856                 };
3857                 memcpy(&si->myname.sa.in, &in, si->myname.sa_socklen);
3858                 break;
3859         }
3860 #ifdef HAVE_IPV6
3861         case AF_INET6: {
3862                 struct sockaddr_in6 in6;
3863
3864                 if (si->family != family) {
3865                         errno = ENETUNREACH;
3866                         ret = -1;
3867                         goto done;
3868                 }
3869
3870                 switch (si->type) {
3871                 case SOCK_STREAM:
3872                         type = SOCKET_TYPE_CHAR_TCP_V6;
3873                         break;
3874                 case SOCK_DGRAM:
3875                         type = SOCKET_TYPE_CHAR_UDP_V6;
3876                         break;
3877                 default:
3878                         errno = ESOCKTNOSUPPORT;
3879                         ret = -1;
3880                         goto done;
3881                 }
3882
3883                 memset(&in6, 0, sizeof(in6));
3884                 in6.sin6_family = AF_INET6;
3885                 in6.sin6_addr = *swrap_ipv6();
3886                 in6.sin6_addr.s6_addr[15] = socket_wrapper_default_iface();
3887
3888                 si->myname = (struct swrap_address) {
3889                         .sa_socklen = sizeof(in6),
3890                 };
3891                 memcpy(&si->myname.sa.in6, &in6, si->myname.sa_socklen);
3892                 break;
3893         }
3894 #endif
3895         default:
3896                 errno = ESOCKTNOSUPPORT;
3897                 ret = -1;
3898                 goto done;
3899         }
3900
3901         if (autobind_start > 60000) {
3902                 autobind_start = 10000;
3903         }
3904
3905         swrap_dir = socket_wrapper_dir();
3906         if (swrap_dir == NULL) {
3907                 errno = EINVAL;
3908                 ret = -1;
3909                 goto done;
3910         }
3911
3912         for (i = 0; i < SOCKET_MAX_SOCKETS; i++) {
3913                 port = autobind_start + i;
3914                 swrap_un_path(&un_addr.sa.un,
3915                               swrap_dir,
3916                               type,
3917                               socket_wrapper_default_iface(),
3918                               port);
3919                 if (stat(un_addr.sa.un.sun_path, &st) == 0) continue;
3920
3921                 ret = libc_bind(fd, &un_addr.sa.s, un_addr.sa_socklen);
3922                 if (ret == -1) {
3923                         goto done;
3924                 }
3925
3926                 si->un_addr = un_addr.sa.un;
3927
3928                 si->bound = 1;
3929                 autobind_start = port + 1;
3930                 break;
3931         }
3932         if (i == SOCKET_MAX_SOCKETS) {
3933                 SWRAP_LOG(SWRAP_LOG_ERROR, "Too many open unix sockets (%u) for "
3934                                            "interface "SOCKET_FORMAT,
3935                                            SOCKET_MAX_SOCKETS,
3936                                            type,
3937                                            socket_wrapper_default_iface(),
3938                                            0);
3939                 errno = ENFILE;
3940                 ret = -1;
3941                 goto done;
3942         }
3943
3944         si->family = family;
3945         set_port(si->family, port, &si->myname);
3946
3947         ret = 0;
3948
3949 done:
3950         SAFE_FREE(swrap_dir);
3951         swrap_mutex_unlock(&autobind_start_mutex);
3952         return ret;
3953 }
3954
3955 /****************************************************************************
3956  *   CONNECT
3957  ***************************************************************************/
3958
3959 static int swrap_connect(int s, const struct sockaddr *serv_addr,
3960                          socklen_t addrlen)
3961 {
3962         int ret;
3963         struct swrap_address un_addr = {
3964                 .sa_socklen = sizeof(struct sockaddr_un),
3965         };
3966         struct socket_info *si = find_socket_info(s);
3967         int bcast = 0;
3968
3969         if (!si) {
3970                 return libc_connect(s, serv_addr, addrlen);
3971         }
3972
3973         SWRAP_LOCK_SI(si);
3974
3975         if (si->bound == 0) {
3976                 ret = swrap_auto_bind(s, si, serv_addr->sa_family);
3977                 if (ret == -1) {
3978                         goto done;
3979                 }
3980         }
3981
3982         if (si->family != serv_addr->sa_family) {
3983                 SWRAP_LOG(SWRAP_LOG_ERROR,
3984                           "called for fd=%d (family=%d) called with invalid family=%d",
3985                           s, si->family, serv_addr->sa_family);
3986                 errno = EINVAL;
3987                 ret = -1;
3988                 goto done;
3989         }
3990
3991         ret = sockaddr_convert_to_un(si, serv_addr,
3992                                      addrlen, &un_addr.sa.un, 0, &bcast);
3993         if (ret == -1) {
3994                 goto done;
3995         }
3996
3997         if (bcast) {
3998                 errno = ENETUNREACH;
3999                 ret = -1;
4000                 goto done;
4001         }
4002
4003         if (si->type == SOCK_DGRAM) {
4004                 si->defer_connect = 1;
4005                 ret = 0;
4006         } else {
4007                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_SEND, NULL, 0);
4008
4009                 ret = libc_connect(s,
4010                                    &un_addr.sa.s,
4011                                    un_addr.sa_socklen);
4012         }
4013
4014         SWRAP_LOG(SWRAP_LOG_TRACE,
4015                   "connect() path=%s, fd=%d",
4016                   un_addr.sa.un.sun_path, s);
4017
4018
4019         /* to give better errors */
4020         if (ret == -1 && errno == ENOENT) {
4021                 errno = EHOSTUNREACH;
4022         }
4023
4024         if (ret == 0) {
4025                 si->peername = (struct swrap_address) {
4026                         .sa_socklen = addrlen,
4027                 };
4028
4029                 memcpy(&si->peername.sa.ss, serv_addr, addrlen);
4030                 si->connected = 1;
4031
4032                 /*
4033                  * When we connect() on a socket than we have to bind the
4034                  * outgoing connection on the interface we use for the
4035                  * transport. We already bound it on the right interface
4036                  * but here we have to update the name so getsockname()
4037                  * returns correct information.
4038                  */
4039                 if (si->bindname.sa_socklen > 0) {
4040                         si->myname = (struct swrap_address) {
4041                                 .sa_socklen = si->bindname.sa_socklen,
4042                         };
4043
4044                         memcpy(&si->myname.sa.ss,
4045                                &si->bindname.sa.ss,
4046                                si->bindname.sa_socklen);
4047
4048                         /* Cleanup bindname */
4049                         si->bindname = (struct swrap_address) {
4050                                 .sa_socklen = 0,
4051                         };
4052                 }
4053
4054                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_RECV, NULL, 0);
4055                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_ACK, NULL, 0);
4056         } else {
4057                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_UNREACH, NULL, 0);
4058         }
4059
4060 done:
4061         SWRAP_UNLOCK_SI(si);
4062         return ret;
4063 }
4064
4065 int connect(int s, const struct sockaddr *serv_addr, socklen_t addrlen)
4066 {
4067         return swrap_connect(s, serv_addr, addrlen);
4068 }
4069
4070 /****************************************************************************
4071  *   BIND
4072  ***************************************************************************/
4073
4074 static int swrap_bind(int s, const struct sockaddr *myaddr, socklen_t addrlen)
4075 {
4076         int ret;
4077         struct swrap_address un_addr = {
4078                 .sa_socklen = sizeof(struct sockaddr_un),
4079         };
4080         struct socket_info *si = find_socket_info(s);
4081         int bind_error = 0;
4082 #if 0 /* FIXME */
4083         bool in_use;
4084 #endif
4085
4086         if (!si) {
4087                 return libc_bind(s, myaddr, addrlen);
4088         }
4089
4090         SWRAP_LOCK_SI(si);
4091
4092         switch (si->family) {
4093         case AF_INET: {
4094                 const struct sockaddr_in *sin;
4095                 if (addrlen < sizeof(struct sockaddr_in)) {
4096                         bind_error = EINVAL;
4097                         break;
4098                 }
4099
4100                 sin = (const struct sockaddr_in *)(const void *)myaddr;
4101
4102                 if (sin->sin_family != AF_INET) {
4103                         bind_error = EAFNOSUPPORT;
4104                 }
4105
4106                 /* special case for AF_UNSPEC */
4107                 if (sin->sin_family == AF_UNSPEC &&
4108                     (sin->sin_addr.s_addr == htonl(INADDR_ANY)))
4109                 {
4110                         bind_error = 0;
4111                 }
4112
4113                 break;
4114         }
4115 #ifdef HAVE_IPV6
4116         case AF_INET6: {
4117                 const struct sockaddr_in6 *sin6;
4118                 if (addrlen < sizeof(struct sockaddr_in6)) {
4119                         bind_error = EINVAL;
4120                         break;
4121                 }
4122
4123                 sin6 = (const struct sockaddr_in6 *)(const void *)myaddr;
4124
4125                 if (sin6->sin6_family != AF_INET6) {
4126                         bind_error = EAFNOSUPPORT;
4127                 }
4128
4129                 break;
4130         }
4131 #endif
4132         default:
4133                 bind_error = EINVAL;
4134                 break;
4135         }
4136
4137         if (bind_error != 0) {
4138                 errno = bind_error;
4139                 ret = -1;
4140                 goto out;
4141         }
4142
4143 #if 0 /* FIXME */
4144         in_use = check_addr_port_in_use(myaddr, addrlen);
4145         if (in_use) {
4146                 errno = EADDRINUSE;
4147                 ret = -1;
4148                 goto out;
4149         }
4150 #endif
4151
4152         si->myname.sa_socklen = addrlen;
4153         memcpy(&si->myname.sa.ss, myaddr, addrlen);
4154
4155         ret = sockaddr_convert_to_un(si,
4156                                      myaddr,
4157                                      addrlen,
4158                                      &un_addr.sa.un,
4159                                      1,
4160                                      &si->bcast);
4161         if (ret == -1) {
4162                 goto out;
4163         }
4164
4165         unlink(un_addr.sa.un.sun_path);
4166
4167         ret = libc_bind(s, &un_addr.sa.s, un_addr.sa_socklen);
4168
4169         SWRAP_LOG(SWRAP_LOG_TRACE,
4170                   "bind() path=%s, fd=%d",
4171                   un_addr.sa.un.sun_path, s);
4172
4173         if (ret == 0) {
4174                 si->bound = 1;
4175         }
4176
4177 out:
4178         SWRAP_UNLOCK_SI(si);
4179
4180         return ret;
4181 }
4182
4183 int bind(int s, const struct sockaddr *myaddr, socklen_t addrlen)
4184 {
4185         return swrap_bind(s, myaddr, addrlen);
4186 }
4187
4188 /****************************************************************************
4189  *   BINDRESVPORT
4190  ***************************************************************************/
4191
4192 #ifdef HAVE_BINDRESVPORT
4193 static int swrap_getsockname(int s, struct sockaddr *name, socklen_t *addrlen);
4194
4195 static int swrap_bindresvport_sa(int sd, struct sockaddr *sa)
4196 {
4197         struct swrap_address myaddr = {
4198                 .sa_socklen = sizeof(struct sockaddr_storage),
4199         };
4200         socklen_t salen;
4201         static uint16_t port;
4202         uint16_t i;
4203         int rc = -1;
4204         int af;
4205
4206 #define SWRAP_STARTPORT 600
4207 #define SWRAP_ENDPORT (IPPORT_RESERVED - 1)
4208 #define SWRAP_NPORTS (SWRAP_ENDPORT - SWRAP_STARTPORT + 1)
4209
4210         if (port == 0) {
4211                 port = (getpid() % SWRAP_NPORTS) + SWRAP_STARTPORT;
4212         }
4213
4214         if (sa == NULL) {
4215                 salen = myaddr.sa_socklen;
4216                 sa = &myaddr.sa.s;
4217
4218                 rc = swrap_getsockname(sd, &myaddr.sa.s, &salen);
4219                 if (rc < 0) {
4220                         return -1;
4221                 }
4222
4223                 af = sa->sa_family;
4224                 memset(&myaddr.sa.ss, 0, salen);
4225         } else {
4226                 af = sa->sa_family;
4227         }
4228
4229         for (i = 0; i < SWRAP_NPORTS; i++, port++) {
4230                 switch(af) {
4231                 case AF_INET: {
4232                         struct sockaddr_in *sinp = (struct sockaddr_in *)(void *)sa;
4233
4234                         salen = sizeof(struct sockaddr_in);
4235                         sinp->sin_port = htons(port);
4236                         break;
4237                 }
4238                 case AF_INET6: {
4239                         struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)(void *)sa;
4240
4241                         salen = sizeof(struct sockaddr_in6);
4242                         sin6p->sin6_port = htons(port);
4243                         break;
4244                 }
4245                 default:
4246                         errno = EAFNOSUPPORT;
4247                         return -1;
4248                 }
4249                 sa->sa_family = af;
4250
4251                 if (port > SWRAP_ENDPORT) {
4252                         port = SWRAP_STARTPORT;
4253                 }
4254
4255                 rc = swrap_bind(sd, (struct sockaddr *)sa, salen);
4256                 if (rc == 0 || errno != EADDRINUSE) {
4257                         break;
4258                 }
4259         }
4260
4261         return rc;
4262 }
4263
4264 int bindresvport(int sockfd, struct sockaddr_in *sinp)
4265 {
4266         return swrap_bindresvport_sa(sockfd, (struct sockaddr *)sinp);
4267 }
4268 #endif
4269
4270 /****************************************************************************
4271  *   LISTEN
4272  ***************************************************************************/
4273
4274 static int swrap_listen(int s, int backlog)
4275 {
4276         int ret;
4277         struct socket_info *si = find_socket_info(s);
4278
4279         if (!si) {
4280                 return libc_listen(s, backlog);
4281         }
4282
4283         SWRAP_LOCK_SI(si);
4284
4285         if (si->bound == 0) {
4286                 ret = swrap_auto_bind(s, si, si->family);
4287                 if (ret == -1) {
4288                         errno = EADDRINUSE;
4289                         goto out;
4290                 }
4291         }
4292
4293         ret = libc_listen(s, backlog);
4294         if (ret == 0) {
4295                 si->listening = 1;
4296         }
4297
4298 out:
4299         SWRAP_UNLOCK_SI(si);
4300
4301         return ret;
4302 }
4303
4304 int listen(int s, int backlog)
4305 {
4306         return swrap_listen(s, backlog);
4307 }
4308
4309 /****************************************************************************
4310  *   FOPEN
4311  ***************************************************************************/
4312
4313 static FILE *swrap_fopen(const char *name, const char *mode)
4314 {
4315         FILE *fp;
4316
4317         fp = libc_fopen(name, mode);
4318         if (fp != NULL) {
4319                 int fd = fileno(fp);
4320
4321                 swrap_remove_stale(fd);
4322         }
4323
4324         return fp;
4325 }
4326
4327 FILE *fopen(const char *name, const char *mode)
4328 {
4329         return swrap_fopen(name, mode);
4330 }
4331
4332 /****************************************************************************
4333  *   FOPEN64
4334  ***************************************************************************/
4335
4336 #ifdef HAVE_FOPEN64
4337 static FILE *swrap_fopen64(const char *name, const char *mode)
4338 {
4339         FILE *fp;
4340
4341         fp = libc_fopen64(name, mode);
4342         if (fp != NULL) {
4343                 int fd = fileno(fp);
4344
4345                 swrap_remove_stale(fd);
4346         }
4347
4348         return fp;
4349 }
4350
4351 FILE *fopen64(const char *name, const char *mode)
4352 {
4353         return swrap_fopen64(name, mode);
4354 }
4355 #endif /* HAVE_FOPEN64 */
4356
4357 /****************************************************************************
4358  *   OPEN
4359  ***************************************************************************/
4360
4361 static int swrap_vopen(const char *pathname, int flags, va_list ap)
4362 {
4363         int ret;
4364
4365         ret = libc_vopen(pathname, flags, ap);
4366         if (ret != -1) {
4367                 /*
4368                  * There are methods for closing descriptors (libc-internal code
4369                  * paths, direct syscalls) which close descriptors in ways that
4370                  * we can't intercept, so try to recover when we notice that
4371                  * that's happened
4372                  */
4373                 swrap_remove_stale(ret);
4374         }
4375         return ret;
4376 }
4377
4378 int open(const char *pathname, int flags, ...)
4379 {
4380         va_list ap;
4381         int fd;
4382
4383         va_start(ap, flags);
4384         fd = swrap_vopen(pathname, flags, ap);
4385         va_end(ap);
4386
4387         return fd;
4388 }
4389
4390 /****************************************************************************
4391  *   OPEN64
4392  ***************************************************************************/
4393
4394 #ifdef HAVE_OPEN64
4395 static int swrap_vopen64(const char *pathname, int flags, va_list ap)
4396 {
4397         int ret;
4398
4399         ret = libc_vopen64(pathname, flags, ap);
4400         if (ret != -1) {
4401                 /*
4402                  * There are methods for closing descriptors (libc-internal code
4403                  * paths, direct syscalls) which close descriptors in ways that
4404                  * we can't intercept, so try to recover when we notice that
4405                  * that's happened
4406                  */
4407                 swrap_remove_stale(ret);
4408         }
4409         return ret;
4410 }
4411
4412 int open64(const char *pathname, int flags, ...)
4413 {
4414         va_list ap;
4415         int fd;
4416
4417         va_start(ap, flags);
4418         fd = swrap_vopen64(pathname, flags, ap);
4419         va_end(ap);
4420
4421         return fd;
4422 }
4423 #endif /* HAVE_OPEN64 */
4424
4425 /****************************************************************************
4426  *   OPENAT
4427  ***************************************************************************/
4428
4429 static int swrap_vopenat(int dirfd, const char *path, int flags, va_list ap)
4430 {
4431         int ret;
4432
4433         ret = libc_vopenat(dirfd, path, flags, ap);
4434         if (ret != -1) {
4435                 /*
4436                  * There are methods for closing descriptors (libc-internal code
4437                  * paths, direct syscalls) which close descriptors in ways that
4438                  * we can't intercept, so try to recover when we notice that
4439                  * that's happened
4440                  */
4441                 swrap_remove_stale(ret);
4442         }
4443
4444         return ret;
4445 }
4446
4447 int openat(int dirfd, const char *path, int flags, ...)
4448 {
4449         va_list ap;
4450         int fd;
4451
4452         va_start(ap, flags);
4453         fd = swrap_vopenat(dirfd, path, flags, ap);
4454         va_end(ap);
4455
4456         return fd;
4457 }
4458
4459 /****************************************************************************
4460  *   GETPEERNAME
4461  ***************************************************************************/
4462
4463 static int swrap_getpeername(int s, struct sockaddr *name, socklen_t *addrlen)
4464 {
4465         struct socket_info *si = find_socket_info(s);
4466         socklen_t len;
4467         int ret = -1;
4468
4469         if (!si) {
4470                 return libc_getpeername(s, name, addrlen);
4471         }
4472
4473         SWRAP_LOCK_SI(si);
4474
4475         if (si->peername.sa_socklen == 0)
4476         {
4477                 errno = ENOTCONN;
4478                 goto out;
4479         }
4480
4481         len = MIN(*addrlen, si->peername.sa_socklen);
4482         if (len == 0) {
4483                 ret = 0;
4484                 goto out;
4485         }
4486
4487         memcpy(name, &si->peername.sa.ss, len);
4488         *addrlen = si->peername.sa_socklen;
4489
4490         ret = 0;
4491 out:
4492         SWRAP_UNLOCK_SI(si);
4493
4494         return ret;
4495 }
4496
4497 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4498 int getpeername(int s, struct sockaddr *name, Psocklen_t addrlen)
4499 #else
4500 int getpeername(int s, struct sockaddr *name, socklen_t *addrlen)
4501 #endif
4502 {
4503         return swrap_getpeername(s, name, (socklen_t *)addrlen);
4504 }
4505
4506 /****************************************************************************
4507  *   GETSOCKNAME
4508  ***************************************************************************/
4509
4510 static int swrap_getsockname(int s, struct sockaddr *name, socklen_t *addrlen)
4511 {
4512         struct socket_info *si = find_socket_info(s);
4513         socklen_t len;
4514         int ret = -1;
4515
4516         if (!si) {
4517                 return libc_getsockname(s, name, addrlen);
4518         }
4519
4520         SWRAP_LOCK_SI(si);
4521
4522         len = MIN(*addrlen, si->myname.sa_socklen);
4523         if (len == 0) {
4524                 ret = 0;
4525                 goto out;
4526         }
4527
4528         memcpy(name, &si->myname.sa.ss, len);
4529         *addrlen = si->myname.sa_socklen;
4530
4531         ret = 0;
4532 out:
4533         SWRAP_UNLOCK_SI(si);
4534
4535         return ret;
4536 }
4537
4538 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4539 int getsockname(int s, struct sockaddr *name, Psocklen_t addrlen)
4540 #else
4541 int getsockname(int s, struct sockaddr *name, socklen_t *addrlen)
4542 #endif
4543 {
4544         return swrap_getsockname(s, name, (socklen_t *)addrlen);
4545 }
4546
4547 /****************************************************************************
4548  *   GETSOCKOPT
4549  ***************************************************************************/
4550
4551 #ifndef SO_PROTOCOL
4552 # ifdef SO_PROTOTYPE /* The Solaris name */
4553 #  define SO_PROTOCOL SO_PROTOTYPE
4554 # endif /* SO_PROTOTYPE */
4555 #endif /* SO_PROTOCOL */
4556
4557 static int swrap_getsockopt(int s, int level, int optname,
4558                             void *optval, socklen_t *optlen)
4559 {
4560         struct socket_info *si = find_socket_info(s);
4561         int ret;
4562
4563         if (!si) {
4564                 return libc_getsockopt(s,
4565                                        level,
4566                                        optname,
4567                                        optval,
4568                                        optlen);
4569         }
4570
4571         SWRAP_LOCK_SI(si);
4572
4573         if (level == SOL_SOCKET) {
4574                 switch (optname) {
4575 #ifdef SO_DOMAIN
4576                 case SO_DOMAIN:
4577                         if (optval == NULL || optlen == NULL ||
4578                             *optlen < (socklen_t)sizeof(int)) {
4579                                 errno = EINVAL;
4580                                 ret = -1;
4581                                 goto done;
4582                         }
4583
4584                         *optlen = sizeof(int);
4585                         *(int *)optval = si->family;
4586                         ret = 0;
4587                         goto done;
4588 #endif /* SO_DOMAIN */
4589
4590 #ifdef SO_PROTOCOL
4591                 case SO_PROTOCOL:
4592                         if (optval == NULL || optlen == NULL ||
4593                             *optlen < (socklen_t)sizeof(int)) {
4594                                 errno = EINVAL;
4595                                 ret = -1;
4596                                 goto done;
4597                         }
4598
4599                         *optlen = sizeof(int);
4600                         *(int *)optval = si->protocol;
4601                         ret = 0;
4602                         goto done;
4603 #endif /* SO_PROTOCOL */
4604                 case SO_TYPE:
4605                         if (optval == NULL || optlen == NULL ||
4606                             *optlen < (socklen_t)sizeof(int)) {
4607                                 errno = EINVAL;
4608                                 ret = -1;
4609                                 goto done;
4610                         }
4611
4612                         *optlen = sizeof(int);
4613                         *(int *)optval = si->type;
4614                         ret = 0;
4615                         goto done;
4616                 default:
4617                         ret = libc_getsockopt(s,
4618                                               level,
4619                                               optname,
4620                                               optval,
4621                                               optlen);
4622                         goto done;
4623                 }
4624         } else if (level == IPPROTO_TCP) {
4625                 switch (optname) {
4626 #ifdef TCP_NODELAY
4627                 case TCP_NODELAY:
4628                         /*
4629                          * This enables sending packets directly out over TCP.
4630                          * As a unix socket is doing that any way, report it as
4631                          * enabled.
4632                          */
4633                         if (optval == NULL || optlen == NULL ||
4634                             *optlen < (socklen_t)sizeof(int)) {
4635                                 errno = EINVAL;
4636                                 ret = -1;
4637                                 goto done;
4638                         }
4639
4640                         *optlen = sizeof(int);
4641                         *(int *)optval = si->tcp_nodelay;
4642
4643                         ret = 0;
4644                         goto done;
4645 #endif /* TCP_NODELAY */
4646 #ifdef TCP_INFO
4647                 case TCP_INFO: {
4648                         struct tcp_info info;
4649                         socklen_t ilen = sizeof(info);
4650
4651 #ifdef HAVE_NETINET_TCP_FSM_H
4652 /* This is FreeBSD */
4653 # define __TCP_LISTEN TCPS_LISTEN
4654 # define __TCP_ESTABLISHED TCPS_ESTABLISHED
4655 # define __TCP_CLOSE TCPS_CLOSED
4656 #else
4657 /* This is Linux */
4658 # define __TCP_LISTEN TCP_LISTEN
4659 # define __TCP_ESTABLISHED TCP_ESTABLISHED
4660 # define __TCP_CLOSE TCP_CLOSE
4661 #endif
4662
4663                         ZERO_STRUCT(info);
4664                         if (si->listening) {
4665                                 info.tcpi_state = __TCP_LISTEN;
4666                         } else if (si->connected) {
4667                                 /*
4668                                  * For now we just fake a few values
4669                                  * supported both by FreeBSD and Linux
4670                                  */
4671                                 info.tcpi_state = __TCP_ESTABLISHED;
4672                                 info.tcpi_rto = 200000;  /* 200 msec */
4673                                 info.tcpi_rtt = 5000;    /* 5 msec */
4674                                 info.tcpi_rttvar = 5000; /* 5 msec */
4675                         } else {
4676                                 info.tcpi_state = __TCP_CLOSE;
4677                                 info.tcpi_rto = 1000000;  /* 1 sec */
4678                                 info.tcpi_rtt = 0;
4679                                 info.tcpi_rttvar = 250000; /* 250 msec */
4680                         }
4681
4682                         if (optval == NULL || optlen == NULL ||
4683                             *optlen < (socklen_t)ilen) {
4684                                 errno = EINVAL;
4685                                 ret = -1;
4686                                 goto done;
4687                         }
4688
4689                         *optlen = ilen;
4690                         memcpy(optval, &info, ilen);
4691
4692                         ret = 0;
4693                         goto done;
4694                 }
4695 #endif /* TCP_INFO */
4696                 default:
4697                         break;
4698                 }
4699         }
4700
4701         errno = ENOPROTOOPT;
4702         ret = -1;
4703
4704 done:
4705         SWRAP_UNLOCK_SI(si);
4706         return ret;
4707 }
4708
4709 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4710 int getsockopt(int s, int level, int optname, void *optval, Psocklen_t optlen)
4711 #else
4712 int getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
4713 #endif
4714 {
4715         return swrap_getsockopt(s, level, optname, optval, (socklen_t *)optlen);
4716 }
4717
4718 /****************************************************************************
4719  *   SETSOCKOPT
4720  ***************************************************************************/
4721
4722 static int swrap_setsockopt(int s, int level, int optname,
4723                             const void *optval, socklen_t optlen)
4724 {
4725         struct socket_info *si = find_socket_info(s);
4726         int ret;
4727
4728         if (!si) {
4729                 return libc_setsockopt(s,
4730                                        level,
4731                                        optname,
4732                                        optval,
4733                                        optlen);
4734         }
4735
4736         if (level == SOL_SOCKET) {
4737                 return libc_setsockopt(s,
4738                                        level,
4739                                        optname,
4740                                        optval,
4741                                        optlen);
4742         }
4743
4744         SWRAP_LOCK_SI(si);
4745
4746         if (level == IPPROTO_TCP) {
4747                 switch (optname) {
4748 #ifdef TCP_NODELAY
4749                 case TCP_NODELAY: {
4750                         int i;
4751
4752                         /*
4753                          * This enables sending packets directly out over TCP.
4754                          * A unix socket is doing that any way.
4755                          */
4756                         if (optval == NULL || optlen == 0 ||
4757                             optlen < (socklen_t)sizeof(int)) {
4758                                 errno = EINVAL;
4759                                 ret = -1;
4760                                 goto done;
4761                         }
4762
4763                         i = *discard_const_p(int, optval);
4764                         if (i != 0 && i != 1) {
4765                                 errno = EINVAL;
4766                                 ret = -1;
4767                                 goto done;
4768                         }
4769                         si->tcp_nodelay = i;
4770
4771                         ret = 0;
4772                         goto done;
4773                 }
4774 #endif /* TCP_NODELAY */
4775                 default:
4776                         break;
4777                 }
4778         }
4779
4780         switch (si->family) {
4781         case AF_INET:
4782                 if (level == IPPROTO_IP) {
4783 #ifdef IP_PKTINFO
4784                         if (optname == IP_PKTINFO) {
4785                                 si->pktinfo = AF_INET;
4786                         }
4787 #endif /* IP_PKTINFO */
4788                 }
4789                 ret = 0;
4790                 goto done;
4791 #ifdef HAVE_IPV6
4792         case AF_INET6:
4793                 if (level == IPPROTO_IPV6) {
4794 #ifdef IPV6_RECVPKTINFO
4795                         if (optname == IPV6_RECVPKTINFO) {
4796                                 si->pktinfo = AF_INET6;
4797                         }
4798 #endif /* IPV6_PKTINFO */
4799                 }
4800                 ret = 0;
4801                 goto done;
4802 #endif
4803         default:
4804                 errno = ENOPROTOOPT;
4805                 ret = -1;
4806                 goto done;
4807         }
4808
4809 done:
4810         SWRAP_UNLOCK_SI(si);
4811         return ret;
4812 }
4813
4814 int setsockopt(int s, int level, int optname,
4815                const void *optval, socklen_t optlen)
4816 {
4817         return swrap_setsockopt(s, level, optname, optval, optlen);
4818 }
4819
4820 /****************************************************************************
4821  *   IOCTL
4822  ***************************************************************************/
4823
4824 static int swrap_vioctl(int s, unsigned long int r, va_list va)
4825 {
4826         struct socket_info *si = find_socket_info(s);
4827         va_list ap;
4828         int *value_ptr = NULL;
4829         int rc;
4830
4831         if (!si) {
4832                 return libc_vioctl(s, r, va);
4833         }
4834
4835         SWRAP_LOCK_SI(si);
4836
4837         va_copy(ap, va);
4838
4839         rc = libc_vioctl(s, r, va);
4840
4841         switch (r) {
4842         case FIONREAD:
4843                 if (rc == 0) {
4844                         value_ptr = ((int *)va_arg(ap, int *));
4845                 }
4846
4847                 if (rc == -1 && errno != EAGAIN && errno != ENOBUFS) {
4848                         swrap_pcap_dump_packet(si, NULL, SWRAP_PENDING_RST, NULL, 0);
4849                 } else if (value_ptr != NULL && *value_ptr == 0) { /* END OF FILE */
4850                         swrap_pcap_dump_packet(si, NULL, SWRAP_PENDING_RST, NULL, 0);
4851                 }
4852                 break;
4853 #ifdef FIONWRITE
4854         case FIONWRITE:
4855                 /* this is FreeBSD */
4856                 FALL_THROUGH; /* to TIOCOUTQ */
4857 #endif /* FIONWRITE */
4858         case TIOCOUTQ: /* same as SIOCOUTQ on Linux */
4859                 /*
4860                  * This may return more bytes then the application
4861                  * sent into the socket, for tcp it should
4862                  * return the number of unacked bytes.
4863                  *
4864                  * On AF_UNIX, all bytes are immediately acked!
4865                  */
4866                 if (rc == 0) {
4867                         value_ptr = ((int *)va_arg(ap, int *));
4868                         *value_ptr = 0;
4869                 }
4870                 break;
4871         }
4872
4873         va_end(ap);
4874
4875         SWRAP_UNLOCK_SI(si);
4876         return rc;
4877 }
4878
4879 #ifdef HAVE_IOCTL_INT
4880 int ioctl(int s, int r, ...)
4881 #else
4882 int ioctl(int s, unsigned long int r, ...)
4883 #endif
4884 {
4885         va_list va;
4886         int rc;
4887
4888         va_start(va, r);
4889
4890         rc = swrap_vioctl(s, (unsigned long int) r, va);
4891
4892         va_end(va);
4893
4894         return rc;
4895 }
4896
4897 /*****************
4898  * CMSG
4899  *****************/
4900
4901 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
4902
4903 #ifndef CMSG_ALIGN
4904 # ifdef _ALIGN /* BSD */
4905 #define CMSG_ALIGN _ALIGN
4906 # else
4907 #define CMSG_ALIGN(len) (((len) + sizeof(size_t) - 1) & ~(sizeof(size_t) - 1))
4908 # endif /* _ALIGN */
4909 #endif /* CMSG_ALIGN */
4910
4911 /**
4912  * @brief Add a cmsghdr to a msghdr.
4913  *
4914  * This is an function to add any type of cmsghdr. It will operate on the
4915  * msg->msg_control and msg->msg_controllen you pass in by adapting them to
4916  * the buffer position after the added cmsg element. Hence, this function is
4917  * intended to be used with an intermediate msghdr and not on the original
4918  * one handed in by the client.
4919  *
4920  * @param[in]  msg      The msghdr to which to add the cmsg.
4921  *
4922  * @param[in]  level    The cmsg level to set.
4923  *
4924  * @param[in]  type     The cmsg type to set.
4925  *
4926  * @param[in]  data     The cmsg data to set.
4927  *
4928  * @param[in]  len      the length of the data to set.
4929  */
4930 static void swrap_msghdr_add_cmsghdr(struct msghdr *msg,
4931                                      int level,
4932                                      int type,
4933                                      const void *data,
4934                                      size_t len)
4935 {
4936         size_t cmlen = CMSG_LEN(len);
4937         size_t cmspace = CMSG_SPACE(len);
4938         uint8_t cmbuf[cmspace];
4939         void *cast_ptr = (void *)cmbuf;
4940         struct cmsghdr *cm = (struct cmsghdr *)cast_ptr;
4941         uint8_t *p;
4942
4943         memset(cmbuf, 0, cmspace);
4944
4945         if (msg->msg_controllen < cmlen) {
4946                 cmlen = msg->msg_controllen;
4947                 msg->msg_flags |= MSG_CTRUNC;
4948         }
4949
4950         if (msg->msg_controllen < cmspace) {
4951                 cmspace = msg->msg_controllen;
4952         }
4953
4954         /*
4955          * We copy the full input data into an intermediate cmsghdr first
4956          * in order to more easily cope with truncation.
4957          */
4958         cm->cmsg_len = cmlen;
4959         cm->cmsg_level = level;
4960         cm->cmsg_type = type;
4961         memcpy(CMSG_DATA(cm), data, len);
4962
4963         /*
4964          * We now copy the possibly truncated buffer.
4965          * We copy cmlen bytes, but consume cmspace bytes,
4966          * leaving the possible padding uninitialiazed.
4967          */
4968         p = (uint8_t *)msg->msg_control;
4969         memcpy(p, cm, cmlen);
4970         p += cmspace;
4971         msg->msg_control = p;
4972         msg->msg_controllen -= cmspace;
4973
4974         return;
4975 }
4976
4977 static int swrap_msghdr_add_pktinfo(struct socket_info *si,
4978                                     struct msghdr *msg)
4979 {
4980         /* Add packet info */
4981         switch (si->pktinfo) {
4982 #if defined(IP_PKTINFO) && (defined(HAVE_STRUCT_IN_PKTINFO) || defined(IP_RECVDSTADDR))
4983         case AF_INET: {
4984                 struct sockaddr_in *sin;
4985 #if defined(HAVE_STRUCT_IN_PKTINFO)
4986                 struct in_pktinfo pkt;
4987 #elif defined(IP_RECVDSTADDR)
4988                 struct in_addr pkt;
4989 #endif
4990
4991                 if (si->bindname.sa_socklen == sizeof(struct sockaddr_in)) {
4992                         sin = &si->bindname.sa.in;
4993                 } else {
4994                         if (si->myname.sa_socklen != sizeof(struct sockaddr_in)) {
4995                                 return 0;
4996                         }
4997                         sin = &si->myname.sa.in;
4998                 }
4999
5000                 ZERO_STRUCT(pkt);
5001
5002 #if defined(HAVE_STRUCT_IN_PKTINFO)
5003                 pkt.ipi_ifindex = socket_wrapper_default_iface();
5004                 pkt.ipi_addr.s_addr = sin->sin_addr.s_addr;
5005 #elif defined(IP_RECVDSTADDR)
5006                 pkt = sin->sin_addr;
5007 #endif
5008
5009                 swrap_msghdr_add_cmsghdr(msg, IPPROTO_IP, IP_PKTINFO,
5010                                          &pkt, sizeof(pkt));
5011
5012                 break;
5013         }
5014 #endif /* IP_PKTINFO */
5015 #if defined(HAVE_IPV6)
5016         case AF_INET6: {
5017 #if defined(IPV6_PKTINFO) && defined(HAVE_STRUCT_IN6_PKTINFO)
5018                 struct sockaddr_in6 *sin6;
5019                 struct in6_pktinfo pkt6;
5020
5021                 if (si->bindname.sa_socklen == sizeof(struct sockaddr_in6)) {
5022                         sin6 = &si->bindname.sa.in6;
5023                 } else {
5024                         if (si->myname.sa_socklen != sizeof(struct sockaddr_in6)) {
5025                                 return 0;
5026                         }
5027                         sin6 = &si->myname.sa.in6;
5028                 }
5029
5030                 ZERO_STRUCT(pkt6);
5031
5032                 pkt6.ipi6_ifindex = socket_wrapper_default_iface();
5033                 pkt6.ipi6_addr = sin6->sin6_addr;
5034
5035                 swrap_msghdr_add_cmsghdr(msg, IPPROTO_IPV6, IPV6_PKTINFO,
5036                                         &pkt6, sizeof(pkt6));
5037 #endif /* HAVE_STRUCT_IN6_PKTINFO */
5038
5039                 break;
5040         }
5041 #endif /* IPV6_PKTINFO */
5042         default:
5043                 return -1;
5044         }
5045
5046         return 0;
5047 }
5048
5049 static int swrap_msghdr_add_socket_info(struct socket_info *si,
5050                                         struct msghdr *omsg)
5051 {
5052         int rc = 0;
5053
5054         if (si->pktinfo > 0) {
5055                 rc = swrap_msghdr_add_pktinfo(si, omsg);
5056         }
5057
5058         return rc;
5059 }
5060
5061 static int swrap_sendmsg_copy_cmsg(const struct cmsghdr *cmsg,
5062                                    uint8_t **cm_data,
5063                                    size_t *cm_data_space);
5064 static int swrap_sendmsg_filter_cmsg_ipproto_ip(const struct cmsghdr *cmsg,
5065                                                 uint8_t **cm_data,
5066                                                 size_t *cm_data_space);
5067 static int swrap_sendmsg_filter_cmsg_sol_socket(const struct cmsghdr *cmsg,
5068                                                 uint8_t **cm_data,
5069                                                 size_t *cm_data_space);
5070
5071 static int swrap_sendmsg_filter_cmsghdr(const struct msghdr *_msg,
5072                                         uint8_t **cm_data,
5073                                         size_t *cm_data_space)
5074 {
5075         struct msghdr *msg = discard_const_p(struct msghdr, _msg);
5076         struct cmsghdr *cmsg;
5077         int rc = -1;
5078
5079         /* Nothing to do */
5080         if (msg->msg_controllen == 0 || msg->msg_control == NULL) {
5081                 return 0;
5082         }
5083
5084         for (cmsg = CMSG_FIRSTHDR(msg);
5085              cmsg != NULL;
5086              cmsg = CMSG_NXTHDR(msg, cmsg)) {
5087                 switch (cmsg->cmsg_level) {
5088                 case IPPROTO_IP:
5089                         rc = swrap_sendmsg_filter_cmsg_ipproto_ip(cmsg,
5090                                                                   cm_data,
5091                                                                   cm_data_space);
5092                         break;
5093                 case SOL_SOCKET:
5094                         rc = swrap_sendmsg_filter_cmsg_sol_socket(cmsg,
5095                                                                   cm_data,
5096                                                                   cm_data_space);
5097                         break;
5098                 default:
5099                         rc = swrap_sendmsg_copy_cmsg(cmsg,
5100                                                      cm_data,
5101                                                      cm_data_space);
5102                         break;
5103                 }
5104                 if (rc < 0) {
5105                         int saved_errno = errno;
5106                         SAFE_FREE(*cm_data);
5107                         *cm_data_space = 0;
5108                         errno = saved_errno;
5109                         return rc;
5110                 }
5111         }
5112
5113         return rc;
5114 }
5115
5116 static int swrap_sendmsg_copy_cmsg(const struct cmsghdr *cmsg,
5117                                    uint8_t **cm_data,
5118                                    size_t *cm_data_space)
5119 {
5120         size_t cmspace;
5121         uint8_t *p;
5122
5123         cmspace = *cm_data_space + CMSG_ALIGN(cmsg->cmsg_len);
5124
5125         p = realloc((*cm_data), cmspace);
5126         if (p == NULL) {
5127                 return -1;
5128         }
5129         (*cm_data) = p;
5130
5131         p = (*cm_data) + (*cm_data_space);
5132         *cm_data_space = cmspace;
5133
5134         memcpy(p, cmsg, cmsg->cmsg_len);
5135
5136         return 0;
5137 }
5138
5139 static int swrap_sendmsg_filter_cmsg_pktinfo(const struct cmsghdr *cmsg,
5140                                             uint8_t **cm_data,
5141                                             size_t *cm_data_space);
5142
5143
5144 static int swrap_sendmsg_filter_cmsg_ipproto_ip(const struct cmsghdr *cmsg,
5145                                                 uint8_t **cm_data,
5146                                                 size_t *cm_data_space)
5147 {
5148         int rc = -1;
5149
5150         switch(cmsg->cmsg_type) {
5151 #ifdef IP_PKTINFO
5152         case IP_PKTINFO:
5153                 rc = swrap_sendmsg_filter_cmsg_pktinfo(cmsg,
5154                                                        cm_data,
5155                                                        cm_data_space);
5156                 break;
5157 #endif
5158 #ifdef IPV6_PKTINFO
5159         case IPV6_PKTINFO:
5160                 rc = swrap_sendmsg_filter_cmsg_pktinfo(cmsg,
5161                                                        cm_data,
5162                                                        cm_data_space);
5163                 break;
5164 #endif
5165         default:
5166                 break;
5167         }
5168
5169         return rc;
5170 }
5171
5172 static int swrap_sendmsg_filter_cmsg_pktinfo(const struct cmsghdr *cmsg,
5173                                              uint8_t **cm_data,
5174                                              size_t *cm_data_space)
5175 {
5176         (void)cmsg; /* unused */
5177         (void)cm_data; /* unused */
5178         (void)cm_data_space; /* unused */
5179
5180         /*
5181          * Passing a IP pktinfo to a unix socket might be rejected by the
5182          * Kernel, at least on FreeBSD. So skip this cmsg.
5183          */
5184         return 0;
5185 }
5186
5187 static int swrap_sendmsg_filter_cmsg_sol_socket(const struct cmsghdr *cmsg,
5188                                                 uint8_t **cm_data,
5189                                                 size_t *cm_data_space)
5190 {
5191         int rc = -1;
5192
5193         switch (cmsg->cmsg_type) {
5194         case SCM_RIGHTS:
5195                 SWRAP_LOG(SWRAP_LOG_TRACE,
5196                           "Ignoring SCM_RIGHTS on inet socket!");
5197                 rc = 0;
5198                 break;
5199 #ifdef SCM_CREDENTIALS
5200         case SCM_CREDENTIALS:
5201                 SWRAP_LOG(SWRAP_LOG_TRACE,
5202                           "Ignoring SCM_CREDENTIALS on inet socket!");
5203                 rc = 0;
5204                 break;
5205 #endif /* SCM_CREDENTIALS */
5206         default:
5207                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5208                                              cm_data,
5209                                              cm_data_space);
5210                 break;
5211         }
5212
5213         return rc;
5214 }
5215
5216 static const uint64_t swrap_unix_scm_right_magic = 0x8e0e13f27c42fc36;
5217
5218 /*
5219  * We only allow up to 6 fds at a time
5220  * as that's more than enough for Samba
5221  * and it means we can keep the logic simple
5222  * and work with fixed size arrays.
5223  *
5224  * We also keep sizeof(struct swrap_unix_scm_rights)
5225  * under PIPE_BUF (4096) in order to allow a non-blocking
5226  * write into the pipe.
5227  */
5228 #ifndef PIPE_BUF
5229 #define PIPE_BUF 4096
5230 #endif
5231 #define SWRAP_MAX_PASSED_FDS ((size_t)6)
5232 #define SWRAP_MAX_PASSED_SOCKET_INFO SWRAP_MAX_PASSED_FDS
5233 struct swrap_unix_scm_rights_payload {
5234         uint8_t num_idxs;
5235         int8_t idxs[SWRAP_MAX_PASSED_FDS];
5236         struct socket_info infos[SWRAP_MAX_PASSED_SOCKET_INFO];
5237 };
5238 struct swrap_unix_scm_rights {
5239         uint64_t magic;
5240         char package_name[sizeof(SOCKET_WRAPPER_PACKAGE)];
5241         char package_version[sizeof(SOCKET_WRAPPER_VERSION)];
5242         uint32_t full_size;
5243         uint32_t payload_size;
5244         struct swrap_unix_scm_rights_payload payload;
5245 };
5246
5247 static void swrap_dec_fd_passed_array(size_t num, struct socket_info **array)
5248 {
5249         int saved_errno = errno;
5250         size_t i;
5251
5252         for (i = 0; i < num; i++) {
5253                 struct socket_info *si = array[i];
5254                 if (si == NULL) {
5255                         continue;
5256                 }
5257
5258                 SWRAP_LOCK_SI(si);
5259                 swrap_dec_refcount(si);
5260                 if (si->fd_passed > 0) {
5261                         si->fd_passed -= 1;
5262                 }
5263                 SWRAP_UNLOCK_SI(si);
5264                 array[i] = NULL;
5265         }
5266
5267         errno = saved_errno;
5268 }
5269
5270 static void swrap_undo_si_idx_array(size_t num, int *array)
5271 {
5272         int saved_errno = errno;
5273         size_t i;
5274
5275         swrap_mutex_lock(&first_free_mutex);
5276
5277         for (i = 0; i < num; i++) {
5278                 struct socket_info *si = NULL;
5279
5280                 if (array[i] == -1) {
5281                         continue;
5282                 }
5283
5284                 si = swrap_get_socket_info(array[i]);
5285                 if (si == NULL) {
5286                         continue;
5287                 }
5288
5289                 SWRAP_LOCK_SI(si);
5290                 swrap_dec_refcount(si);
5291                 SWRAP_UNLOCK_SI(si);
5292
5293                 swrap_set_next_free(si, first_free);
5294                 first_free = array[i];
5295                 array[i] = -1;
5296         }
5297
5298         swrap_mutex_unlock(&first_free_mutex);
5299         errno = saved_errno;
5300 }
5301
5302 static void swrap_close_fd_array(size_t num, const int *array)
5303 {
5304         int saved_errno = errno;
5305         size_t i;
5306
5307         for (i = 0; i < num; i++) {
5308                 if (array[i] == -1) {
5309                         continue;
5310                 }
5311                 libc_close(array[i]);
5312         }
5313
5314         errno = saved_errno;
5315 }
5316
5317 union __swrap_fds {
5318         const uint8_t *p;
5319         int *fds;
5320 };
5321
5322 union __swrap_cmsghdr {
5323         const uint8_t *p;
5324         struct cmsghdr *cmsg;
5325 };
5326
5327 static int swrap_sendmsg_unix_scm_rights(const struct cmsghdr *cmsg,
5328                                          uint8_t **cm_data,
5329                                          size_t *cm_data_space,
5330                                          int *scm_rights_pipe_fd)
5331 {
5332         struct swrap_unix_scm_rights info;
5333         struct swrap_unix_scm_rights_payload *payload = NULL;
5334         int si_idx_array[SWRAP_MAX_PASSED_FDS];
5335         struct socket_info *si_array[SWRAP_MAX_PASSED_FDS] = { NULL, };
5336         size_t info_idx = 0;
5337         size_t size_fds_in;
5338         size_t num_fds_in;
5339         union __swrap_fds __fds_in = { .p = NULL, };
5340         const int *fds_in = NULL;
5341         size_t num_fds_out;
5342         size_t size_fds_out;
5343         union __swrap_fds __fds_out = { .p = NULL, };
5344         int *fds_out = NULL;
5345         size_t cmsg_len;
5346         size_t cmsg_space;
5347         size_t new_cm_data_space;
5348         union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
5349         struct cmsghdr *new_cmsg = NULL;
5350         uint8_t *p = NULL;
5351         size_t i;
5352         int pipefd[2] = { -1, -1 };
5353         int rc;
5354         ssize_t sret;
5355
5356         /*
5357          * We pass this a buffer to the kernel make sure any padding
5358          * is also cleared.
5359          */
5360         ZERO_STRUCT(info);
5361         info.magic = swrap_unix_scm_right_magic;
5362         memcpy(info.package_name,
5363                SOCKET_WRAPPER_PACKAGE,
5364                sizeof(info.package_name));
5365         memcpy(info.package_version,
5366                SOCKET_WRAPPER_VERSION,
5367                sizeof(info.package_version));
5368         info.full_size = sizeof(info);
5369         info.payload_size = sizeof(info.payload);
5370         payload = &info.payload;
5371
5372         if (*scm_rights_pipe_fd != -1) {
5373                 SWRAP_LOG(SWRAP_LOG_ERROR,
5374                           "Two SCM_RIGHTS headers are not supported by socket_wrapper");
5375                 errno = EINVAL;
5376                 return -1;
5377         }
5378
5379         if (cmsg->cmsg_len < CMSG_LEN(0)) {
5380                 SWRAP_LOG(SWRAP_LOG_ERROR,
5381                           "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
5382                           (size_t)cmsg->cmsg_len,
5383                           CMSG_LEN(0));
5384                 errno = EINVAL;
5385                 return -1;
5386         }
5387         size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
5388         if ((size_fds_in % sizeof(int)) != 0) {
5389                 SWRAP_LOG(SWRAP_LOG_ERROR,
5390                           "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
5391                           (size_t)cmsg->cmsg_len,
5392                           size_fds_in,
5393                           sizeof(int));
5394                 errno = EINVAL;
5395                 return -1;
5396         }
5397         num_fds_in = size_fds_in / sizeof(int);
5398         if (num_fds_in > SWRAP_MAX_PASSED_FDS) {
5399                 SWRAP_LOG(SWRAP_LOG_ERROR,
5400                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5401                           "num_fds_in=%zu > "
5402                           "SWRAP_MAX_PASSED_FDS(%zu)",
5403                           (size_t)cmsg->cmsg_len,
5404                           size_fds_in,
5405                           num_fds_in,
5406                           SWRAP_MAX_PASSED_FDS);
5407                 errno = EINVAL;
5408                 return -1;
5409         }
5410         if (num_fds_in == 0) {
5411                 SWRAP_LOG(SWRAP_LOG_ERROR,
5412                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5413                           "num_fds_in=%zu",
5414                           (size_t)cmsg->cmsg_len,
5415                           size_fds_in,
5416                           num_fds_in);
5417                 errno = EINVAL;
5418                 return -1;
5419         }
5420         __fds_in.p = CMSG_DATA(cmsg);
5421         fds_in = __fds_in.fds;
5422         num_fds_out = num_fds_in + 1;
5423
5424         SWRAP_LOG(SWRAP_LOG_TRACE,
5425                   "num_fds_in=%zu num_fds_out=%zu",
5426                   num_fds_in, num_fds_out);
5427
5428         size_fds_out = sizeof(int) * num_fds_out;
5429         cmsg_len = CMSG_LEN(size_fds_out);
5430         cmsg_space = CMSG_SPACE(size_fds_out);
5431
5432         new_cm_data_space = *cm_data_space + cmsg_space;
5433
5434         p = realloc((*cm_data), new_cm_data_space);
5435         if (p == NULL) {
5436                 return -1;
5437         }
5438         (*cm_data) = p;
5439         p = (*cm_data) + (*cm_data_space);
5440         memset(p, 0, cmsg_space);
5441         __new_cmsg.p = p;
5442         new_cmsg = __new_cmsg.cmsg;
5443         *new_cmsg = *cmsg;
5444         __fds_out.p = CMSG_DATA(new_cmsg);
5445         fds_out = __fds_out.fds;
5446         memcpy(fds_out, fds_in, size_fds_in);
5447         new_cmsg->cmsg_len = cmsg->cmsg_len;
5448
5449         for (i = 0; i < num_fds_in; i++) {
5450                 size_t j;
5451
5452                 payload->idxs[i] = -1;
5453                 payload->num_idxs++;
5454
5455                 si_idx_array[i] = find_socket_info_index(fds_in[i]);
5456                 if (si_idx_array[i] == -1) {
5457                         continue;
5458                 }
5459
5460                 si_array[i] = swrap_get_socket_info(si_idx_array[i]);
5461                 if (si_array[i] == NULL) {
5462                         SWRAP_LOG(SWRAP_LOG_ERROR,
5463                                   "fds_in[%zu]=%d si_idx_array[%zu]=%d missing!",
5464                                   i, fds_in[i], i, si_idx_array[i]);
5465                         errno = EINVAL;
5466                         return -1;
5467                 }
5468
5469                 for (j = 0; j < i; j++) {
5470                         if (si_array[j] == si_array[i]) {
5471                                 payload->idxs[i] = payload->idxs[j];
5472                                 break;
5473                         }
5474                 }
5475                 if (payload->idxs[i] == -1) {
5476                         if (info_idx >= SWRAP_MAX_PASSED_SOCKET_INFO) {
5477                                 SWRAP_LOG(SWRAP_LOG_ERROR,
5478                                           "fds_in[%zu]=%d,si_idx_array[%zu]=%d: "
5479                                           "info_idx=%zu >= SWRAP_MAX_PASSED_FDS(%zu)!",
5480                                           i, fds_in[i], i, si_idx_array[i],
5481                                           info_idx,
5482                                           SWRAP_MAX_PASSED_SOCKET_INFO);
5483                                 errno = EINVAL;
5484                                 return -1;
5485                         }
5486                         payload->idxs[i] = info_idx;
5487                         info_idx += 1;
5488                         continue;
5489                 }
5490         }
5491
5492         for (i = 0; i < num_fds_in; i++) {
5493                 struct socket_info *si = si_array[i];
5494
5495                 if (si == NULL) {
5496                         SWRAP_LOG(SWRAP_LOG_TRACE,
5497                                   "fds_in[%zu]=%d not an inet socket",
5498                                   i, fds_in[i]);
5499                         continue;
5500                 }
5501
5502                 SWRAP_LOG(SWRAP_LOG_TRACE,
5503                           "fds_in[%zu]=%d si_idx_array[%zu]=%d "
5504                           "passing as info.idxs[%zu]=%d!",
5505                           i, fds_in[i],
5506                           i, si_idx_array[i],
5507                           i, payload->idxs[i]);
5508
5509                 SWRAP_LOCK_SI(si);
5510                 si->fd_passed += 1;
5511                 payload->infos[payload->idxs[i]] = *si;
5512                 payload->infos[payload->idxs[i]].fd_passed = 0;
5513                 SWRAP_UNLOCK_SI(si);
5514         }
5515
5516         rc = pipe(pipefd);
5517         if (rc == -1) {
5518                 int saved_errno = errno;
5519                 SWRAP_LOG(SWRAP_LOG_ERROR,
5520                           "pipe() failed - %d %s",
5521                           saved_errno,
5522                           strerror(saved_errno));
5523                 swrap_dec_fd_passed_array(num_fds_in, si_array);
5524                 errno = saved_errno;
5525                 return -1;
5526         }
5527
5528         sret = libc_write(pipefd[1], &info, sizeof(info));
5529         if (sret != sizeof(info)) {
5530                 int saved_errno = errno;
5531                 if (sret != -1) {
5532                         saved_errno = EINVAL;
5533                 }
5534                 SWRAP_LOG(SWRAP_LOG_ERROR,
5535                           "write() failed - sret=%zd - %d %s",
5536                           sret, saved_errno,
5537                           strerror(saved_errno));
5538                 swrap_dec_fd_passed_array(num_fds_in, si_array);
5539                 libc_close(pipefd[1]);
5540                 libc_close(pipefd[0]);
5541                 errno = saved_errno;
5542                 return -1;
5543         }
5544         libc_close(pipefd[1]);
5545
5546         /*
5547          * Add the pipe read end to the end of the passed fd array
5548          */
5549         fds_out[num_fds_in] = pipefd[0];
5550         new_cmsg->cmsg_len = cmsg_len;
5551
5552         /* we're done ... */
5553         *scm_rights_pipe_fd = pipefd[0];
5554         *cm_data_space = new_cm_data_space;
5555
5556         return 0;
5557 }
5558
5559 static int swrap_sendmsg_unix_sol_socket(const struct cmsghdr *cmsg,
5560                                          uint8_t **cm_data,
5561                                          size_t *cm_data_space,
5562                                          int *scm_rights_pipe_fd)
5563 {
5564         int rc = -1;
5565
5566         switch (cmsg->cmsg_type) {
5567         case SCM_RIGHTS:
5568                 rc = swrap_sendmsg_unix_scm_rights(cmsg,
5569                                                    cm_data,
5570                                                    cm_data_space,
5571                                                    scm_rights_pipe_fd);
5572                 break;
5573         default:
5574                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5575                                              cm_data,
5576                                              cm_data_space);
5577                 break;
5578         }
5579
5580         return rc;
5581 }
5582
5583 static int swrap_recvmsg_unix_scm_rights(const struct cmsghdr *cmsg,
5584                                          uint8_t **cm_data,
5585                                          size_t *cm_data_space)
5586 {
5587         int scm_rights_pipe_fd = -1;
5588         struct swrap_unix_scm_rights info;
5589         struct swrap_unix_scm_rights_payload *payload = NULL;
5590         int si_idx_array[SWRAP_MAX_PASSED_FDS];
5591         size_t size_fds_in;
5592         size_t num_fds_in;
5593         union __swrap_fds __fds_in = { .p = NULL, };
5594         const int *fds_in = NULL;
5595         size_t num_fds_out;
5596         size_t size_fds_out;
5597         union __swrap_fds __fds_out = { .p = NULL, };
5598         int *fds_out = NULL;
5599         size_t cmsg_len;
5600         size_t cmsg_space;
5601         size_t new_cm_data_space;
5602         union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
5603         struct cmsghdr *new_cmsg = NULL;
5604         uint8_t *p = NULL;
5605         ssize_t sret;
5606         size_t i;
5607         int cmp;
5608
5609         if (cmsg->cmsg_len < CMSG_LEN(0)) {
5610                 SWRAP_LOG(SWRAP_LOG_ERROR,
5611                           "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
5612                           (size_t)cmsg->cmsg_len,
5613                           CMSG_LEN(0));
5614                 errno = EINVAL;
5615                 return -1;
5616         }
5617         size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
5618         if ((size_fds_in % sizeof(int)) != 0) {
5619                 SWRAP_LOG(SWRAP_LOG_ERROR,
5620                           "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
5621                           (size_t)cmsg->cmsg_len,
5622                           size_fds_in,
5623                           sizeof(int));
5624                 errno = EINVAL;
5625                 return -1;
5626         }
5627         num_fds_in = size_fds_in / sizeof(int);
5628         if (num_fds_in > (SWRAP_MAX_PASSED_FDS + 1)) {
5629                 SWRAP_LOG(SWRAP_LOG_ERROR,
5630                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5631                           "num_fds_in=%zu > SWRAP_MAX_PASSED_FDS+1(%zu)",
5632                           (size_t)cmsg->cmsg_len,
5633                           size_fds_in,
5634                           num_fds_in,
5635                           SWRAP_MAX_PASSED_FDS+1);
5636                 errno = EINVAL;
5637                 return -1;
5638         }
5639         if (num_fds_in <= 1) {
5640                 SWRAP_LOG(SWRAP_LOG_ERROR,
5641                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5642                           "num_fds_in=%zu",
5643                           (size_t)cmsg->cmsg_len,
5644                           size_fds_in,
5645                           num_fds_in);
5646                 errno = EINVAL;
5647                 return -1;
5648         }
5649         __fds_in.p = CMSG_DATA(cmsg);
5650         fds_in = __fds_in.fds;
5651         num_fds_out = num_fds_in - 1;
5652
5653         SWRAP_LOG(SWRAP_LOG_TRACE,
5654                   "num_fds_in=%zu num_fds_out=%zu",
5655                   num_fds_in, num_fds_out);
5656
5657         for (i = 0; i < num_fds_in; i++) {
5658                 /* Check if we have a stale fd and remove it */
5659                 swrap_remove_stale(fds_in[i]);
5660         }
5661
5662         scm_rights_pipe_fd = fds_in[num_fds_out];
5663         size_fds_out = sizeof(int) * num_fds_out;
5664         cmsg_len = CMSG_LEN(size_fds_out);
5665         cmsg_space = CMSG_SPACE(size_fds_out);
5666
5667         new_cm_data_space = *cm_data_space + cmsg_space;
5668
5669         p = realloc((*cm_data), new_cm_data_space);
5670         if (p == NULL) {
5671                 swrap_close_fd_array(num_fds_in, fds_in);
5672                 return -1;
5673         }
5674         (*cm_data) = p;
5675         p = (*cm_data) + (*cm_data_space);
5676         memset(p, 0, cmsg_space);
5677         __new_cmsg.p = p;
5678         new_cmsg = __new_cmsg.cmsg;
5679         *new_cmsg = *cmsg;
5680         __fds_out.p = CMSG_DATA(new_cmsg);
5681         fds_out = __fds_out.fds;
5682         memcpy(fds_out, fds_in, size_fds_out);
5683         new_cmsg->cmsg_len = cmsg_len;
5684
5685         sret = read(scm_rights_pipe_fd, &info, sizeof(info));
5686         if (sret != sizeof(info)) {
5687                 int saved_errno = errno;
5688                 if (sret != -1) {
5689                         saved_errno = EINVAL;
5690                 }
5691                 SWRAP_LOG(SWRAP_LOG_ERROR,
5692                           "read() failed - sret=%zd - %d %s",
5693                           sret, saved_errno,
5694                           strerror(saved_errno));
5695                 swrap_close_fd_array(num_fds_in, fds_in);
5696                 errno = saved_errno;
5697                 return -1;
5698         }
5699         libc_close(scm_rights_pipe_fd);
5700         payload = &info.payload;
5701
5702         if (info.magic != swrap_unix_scm_right_magic) {
5703                 SWRAP_LOG(SWRAP_LOG_ERROR,
5704                           "info.magic=0x%llx != swrap_unix_scm_right_magic=0x%llx",
5705                           (unsigned long long)info.magic,
5706                           (unsigned long long)swrap_unix_scm_right_magic);
5707                 swrap_close_fd_array(num_fds_out, fds_out);
5708                 errno = EINVAL;
5709                 return -1;
5710         }
5711
5712         cmp = memcmp(info.package_name,
5713                      SOCKET_WRAPPER_PACKAGE,
5714                      sizeof(info.package_name));
5715         if (cmp != 0) {
5716                 SWRAP_LOG(SWRAP_LOG_ERROR,
5717                           "info.package_name='%.*s' != '%s'",
5718                           (int)sizeof(info.package_name),
5719                           info.package_name,
5720                           SOCKET_WRAPPER_PACKAGE);
5721                 swrap_close_fd_array(num_fds_out, fds_out);
5722                 errno = EINVAL;
5723                 return -1;
5724         }
5725
5726         cmp = memcmp(info.package_version,
5727                      SOCKET_WRAPPER_VERSION,
5728                      sizeof(info.package_version));
5729         if (cmp != 0) {
5730                 SWRAP_LOG(SWRAP_LOG_ERROR,
5731                           "info.package_version='%.*s' != '%s'",
5732                           (int)sizeof(info.package_version),
5733                           info.package_version,
5734                           SOCKET_WRAPPER_VERSION);
5735                 swrap_close_fd_array(num_fds_out, fds_out);
5736                 errno = EINVAL;
5737                 return -1;
5738         }
5739
5740         if (info.full_size != sizeof(info)) {
5741                 SWRAP_LOG(SWRAP_LOG_ERROR,
5742                           "info.full_size=%zu != sizeof(info)=%zu",
5743                           (size_t)info.full_size,
5744                           sizeof(info));
5745                 swrap_close_fd_array(num_fds_out, fds_out);
5746                 errno = EINVAL;
5747                 return -1;
5748         }
5749
5750         if (info.payload_size != sizeof(info.payload)) {
5751                 SWRAP_LOG(SWRAP_LOG_ERROR,
5752                           "info.payload_size=%zu != sizeof(info.payload)=%zu",
5753                           (size_t)info.payload_size,
5754                           sizeof(info.payload));
5755                 swrap_close_fd_array(num_fds_out, fds_out);
5756                 errno = EINVAL;
5757                 return -1;
5758         }
5759
5760         if (payload->num_idxs != num_fds_out) {
5761                 SWRAP_LOG(SWRAP_LOG_ERROR,
5762                           "info.num_idxs=%u != num_fds_out=%zu",
5763                           payload->num_idxs, num_fds_out);
5764                 swrap_close_fd_array(num_fds_out, fds_out);
5765                 errno = EINVAL;
5766                 return -1;
5767         }
5768
5769         for (i = 0; i < num_fds_out; i++) {
5770                 size_t j;
5771
5772                 si_idx_array[i] = -1;
5773
5774                 if (payload->idxs[i] == -1) {
5775                         SWRAP_LOG(SWRAP_LOG_TRACE,
5776                                   "fds_out[%zu]=%d not an inet socket",
5777                                   i, fds_out[i]);
5778                         continue;
5779                 }
5780
5781                 if (payload->idxs[i] < 0) {
5782                         SWRAP_LOG(SWRAP_LOG_ERROR,
5783                                   "fds_out[%zu]=%d info.idxs[%zu]=%d < 0!",
5784                                   i, fds_out[i], i, payload->idxs[i]);
5785                         swrap_close_fd_array(num_fds_out, fds_out);
5786                         errno = EINVAL;
5787                         return -1;
5788                 }
5789
5790                 if (payload->idxs[i] >= payload->num_idxs) {
5791                         SWRAP_LOG(SWRAP_LOG_ERROR,
5792                                   "fds_out[%zu]=%d info.idxs[%zu]=%d >= %u!",
5793                                   i, fds_out[i], i, payload->idxs[i],
5794                                   payload->num_idxs);
5795                         swrap_close_fd_array(num_fds_out, fds_out);
5796                         errno = EINVAL;
5797                         return -1;
5798                 }
5799
5800                 if ((size_t)fds_out[i] >= socket_fds_max) {
5801                         SWRAP_LOG(SWRAP_LOG_ERROR,
5802                                   "The max socket index limit of %zu has been reached, "
5803                                   "trying to add %d",
5804                                   socket_fds_max,
5805                                   fds_out[i]);
5806                         swrap_close_fd_array(num_fds_out, fds_out);
5807                         errno = EMFILE;
5808                         return -1;
5809                 }
5810
5811                 SWRAP_LOG(SWRAP_LOG_TRACE,
5812                           "fds_in[%zu]=%d "
5813                           "received as info.idxs[%zu]=%d!",
5814                           i, fds_out[i],
5815                           i, payload->idxs[i]);
5816
5817                 for (j = 0; j < i; j++) {
5818                         if (payload->idxs[j] == -1) {
5819                                 continue;
5820                         }
5821                         if (payload->idxs[j] == payload->idxs[i]) {
5822                                 si_idx_array[i] = si_idx_array[j];
5823                         }
5824                 }
5825                 if (si_idx_array[i] == -1) {
5826                         const struct socket_info *si = &payload->infos[payload->idxs[i]];
5827
5828                         si_idx_array[i] = swrap_add_socket_info(si);
5829                         if (si_idx_array[i] == -1) {
5830                                 int saved_errno = errno;
5831                                 SWRAP_LOG(SWRAP_LOG_ERROR,
5832                                           "The max socket index limit of %zu has been reached, "
5833                                           "trying to add %d",
5834                                           socket_fds_max,
5835                                           fds_out[i]);
5836                                 swrap_undo_si_idx_array(i, si_idx_array);
5837                                 swrap_close_fd_array(num_fds_out, fds_out);
5838                                 errno = saved_errno;
5839                                 return -1;
5840                         }
5841                         SWRAP_LOG(SWRAP_LOG_TRACE,
5842                                   "Imported %s socket for protocol %s, fd=%d",
5843                                   si->family == AF_INET ? "IPv4" : "IPv6",
5844                                   si->type == SOCK_DGRAM ? "UDP" : "TCP",
5845                                   fds_out[i]);
5846                 }
5847         }
5848
5849         for (i = 0; i < num_fds_out; i++) {
5850                 if (si_idx_array[i] == -1) {
5851                         continue;
5852                 }
5853                 set_socket_info_index(fds_out[i], si_idx_array[i]);
5854         }
5855
5856         /* we're done ... */
5857         *cm_data_space = new_cm_data_space;
5858
5859         return 0;
5860 }
5861
5862 static int swrap_recvmsg_unix_sol_socket(const struct cmsghdr *cmsg,
5863                                          uint8_t **cm_data,
5864                                          size_t *cm_data_space)
5865 {
5866         int rc = -1;
5867
5868         switch (cmsg->cmsg_type) {
5869         case SCM_RIGHTS:
5870                 rc = swrap_recvmsg_unix_scm_rights(cmsg,
5871                                                    cm_data,
5872                                                    cm_data_space);
5873                 break;
5874         default:
5875                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5876                                              cm_data,
5877                                              cm_data_space);
5878                 break;
5879         }
5880
5881         return rc;
5882 }
5883
5884 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5885
5886 static int swrap_sendmsg_before_unix(const struct msghdr *_msg_in,
5887                                      struct msghdr *msg_tmp,
5888                                      int *scm_rights_pipe_fd)
5889 {
5890 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
5891         struct msghdr *msg_in = discard_const_p(struct msghdr, _msg_in);
5892         struct cmsghdr *cmsg = NULL;
5893         uint8_t *cm_data = NULL;
5894         size_t cm_data_space = 0;
5895         int rc = -1;
5896
5897         *msg_tmp = *msg_in;
5898         *scm_rights_pipe_fd = -1;
5899
5900         /* Nothing to do */
5901         if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
5902                 return 0;
5903         }
5904
5905         for (cmsg = CMSG_FIRSTHDR(msg_in);
5906              cmsg != NULL;
5907              cmsg = CMSG_NXTHDR(msg_in, cmsg)) {
5908                 switch (cmsg->cmsg_level) {
5909                 case SOL_SOCKET:
5910                         rc = swrap_sendmsg_unix_sol_socket(cmsg,
5911                                                            &cm_data,
5912                                                            &cm_data_space,
5913                                                            scm_rights_pipe_fd);
5914                         break;
5915
5916                 default:
5917                         rc = swrap_sendmsg_copy_cmsg(cmsg,
5918                                                      &cm_data,
5919                                                      &cm_data_space);
5920                         break;
5921                 }
5922                 if (rc < 0) {
5923                         int saved_errno = errno;
5924                         SAFE_FREE(cm_data);
5925                         errno = saved_errno;
5926                         return rc;
5927                 }
5928         }
5929
5930         msg_tmp->msg_controllen = cm_data_space;
5931         msg_tmp->msg_control = cm_data;
5932
5933         return 0;
5934 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5935         *msg_tmp = *_msg_in;
5936         return 0;
5937 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5938 }
5939
5940 static ssize_t swrap_sendmsg_after_unix(struct msghdr *msg_tmp,
5941                                         ssize_t ret,
5942                                         int scm_rights_pipe_fd)
5943 {
5944 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
5945         int saved_errno = errno;
5946         SAFE_FREE(msg_tmp->msg_control);
5947         if (scm_rights_pipe_fd != -1) {
5948                 libc_close(scm_rights_pipe_fd);
5949         }
5950         errno = saved_errno;
5951 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5952         return ret;
5953 }
5954
5955 static int swrap_recvmsg_before_unix(struct msghdr *msg_in,
5956                                      struct msghdr *msg_tmp,
5957                                      uint8_t **tmp_control)
5958 {
5959 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
5960         const size_t cm_extra_space = CMSG_SPACE(sizeof(int));
5961         uint8_t *cm_data = NULL;
5962         size_t cm_data_space = 0;
5963
5964         *msg_tmp = *msg_in;
5965         *tmp_control = NULL;
5966
5967         SWRAP_LOG(SWRAP_LOG_TRACE,
5968                   "msg_in->msg_controllen=%zu",
5969                   (size_t)msg_in->msg_controllen);
5970
5971         /* Nothing to do */
5972         if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
5973                 return 0;
5974         }
5975
5976         /*
5977          * We need to give the kernel a bit more space in order
5978          * recv the pipe fd, added by swrap_sendmsg_before_unix()).
5979          * swrap_recvmsg_after_unix() will hide it again.
5980          */
5981         cm_data_space = msg_in->msg_controllen;
5982         if (cm_data_space < (INT32_MAX - cm_extra_space)) {
5983                 cm_data_space += cm_extra_space;
5984         }
5985         cm_data = calloc(1, cm_data_space);
5986         if (cm_data == NULL) {
5987                 return -1;
5988         }
5989
5990         msg_tmp->msg_controllen = cm_data_space;
5991         msg_tmp->msg_control = cm_data;
5992         *tmp_control = cm_data;
5993
5994         SWRAP_LOG(SWRAP_LOG_TRACE,
5995                   "msg_tmp->msg_controllen=%zu",
5996                   (size_t)msg_tmp->msg_controllen);
5997         return 0;
5998 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5999         *msg_tmp = *msg_in;
6000         *tmp_control = NULL;
6001         return 0;
6002 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6003 }
6004
6005 static ssize_t swrap_recvmsg_after_unix(struct msghdr *msg_tmp,
6006                                         uint8_t **tmp_control,
6007                                         struct msghdr *msg_out,
6008                                         ssize_t ret)
6009 {
6010 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6011         struct cmsghdr *cmsg = NULL;
6012         uint8_t *cm_data = NULL;
6013         size_t cm_data_space = 0;
6014         int rc = -1;
6015
6016         if (ret < 0) {
6017                 int saved_errno = errno;
6018                 SWRAP_LOG(SWRAP_LOG_TRACE, "ret=%zd - %d - %s", ret,
6019                           saved_errno, strerror(saved_errno));
6020                 SAFE_FREE(*tmp_control);
6021                 /* msg_out should not be touched on error */
6022                 errno = saved_errno;
6023                 return ret;
6024         }
6025
6026         SWRAP_LOG(SWRAP_LOG_TRACE,
6027                   "msg_tmp->msg_controllen=%zu",
6028                   (size_t)msg_tmp->msg_controllen);
6029
6030         /* Nothing to do */
6031         if (msg_tmp->msg_controllen == 0 || msg_tmp->msg_control == NULL) {
6032                 int saved_errno = errno;
6033                 *msg_out = *msg_tmp;
6034                 SAFE_FREE(*tmp_control);
6035                 errno = saved_errno;
6036                 return ret;
6037         }
6038
6039         for (cmsg = CMSG_FIRSTHDR(msg_tmp);
6040              cmsg != NULL;
6041              cmsg = CMSG_NXTHDR(msg_tmp, cmsg)) {
6042                 switch (cmsg->cmsg_level) {
6043                 case SOL_SOCKET:
6044                         rc = swrap_recvmsg_unix_sol_socket(cmsg,
6045                                                            &cm_data,
6046                                                            &cm_data_space);
6047                         break;
6048
6049                 default:
6050                         rc = swrap_sendmsg_copy_cmsg(cmsg,
6051                                                      &cm_data,
6052                                                      &cm_data_space);
6053                         break;
6054                 }
6055                 if (rc < 0) {
6056                         int saved_errno = errno;
6057                         SAFE_FREE(cm_data);
6058                         SAFE_FREE(*tmp_control);
6059                         errno = saved_errno;
6060                         return rc;
6061                 }
6062         }
6063
6064         /*
6065          * msg_tmp->msg_control (*tmp_control) was created by
6066          * swrap_recvmsg_before_unix() and msg_out->msg_control
6067          * is still the buffer of the caller.
6068          */
6069         msg_tmp->msg_control = msg_out->msg_control;
6070         msg_tmp->msg_controllen = msg_out->msg_controllen;
6071         *msg_out = *msg_tmp;
6072
6073         cm_data_space = MIN(cm_data_space, msg_out->msg_controllen);
6074         memcpy(msg_out->msg_control, cm_data, cm_data_space);
6075         msg_out->msg_controllen = cm_data_space;
6076         SAFE_FREE(cm_data);
6077         SAFE_FREE(*tmp_control);
6078
6079         SWRAP_LOG(SWRAP_LOG_TRACE,
6080                   "msg_out->msg_controllen=%zu",
6081                   (size_t)msg_out->msg_controllen);
6082         return ret;
6083 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6084         int saved_errno = errno;
6085         *msg_out = *msg_tmp;
6086         SAFE_FREE(*tmp_control);
6087         errno = saved_errno;
6088         return ret;
6089 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6090 }
6091
6092 static ssize_t swrap_sendmsg_before(int fd,
6093                                     struct socket_info *si,
6094                                     struct msghdr *msg,
6095                                     struct iovec *tmp_iov,
6096                                     struct sockaddr_un *tmp_un,
6097                                     const struct sockaddr_un **to_un,
6098                                     const struct sockaddr **to,
6099                                     int *bcast)
6100 {
6101         size_t i, len = 0;
6102         ssize_t ret = -1;
6103
6104         if (to_un) {
6105                 *to_un = NULL;
6106         }
6107         if (to) {
6108                 *to = NULL;
6109         }
6110         if (bcast) {
6111                 *bcast = 0;
6112         }
6113
6114         SWRAP_LOCK_SI(si);
6115
6116         switch (si->type) {
6117         case SOCK_STREAM: {
6118                 unsigned long mtu;
6119
6120                 if (!si->connected) {
6121                         errno = ENOTCONN;
6122                         goto out;
6123                 }
6124
6125                 if (msg->msg_iovlen == 0) {
6126                         break;
6127                 }
6128
6129                 mtu = socket_wrapper_mtu();
6130                 for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6131                         size_t nlen;
6132                         nlen = len + msg->msg_iov[i].iov_len;
6133                         if (nlen < len) {
6134                                 /* overflow */
6135                                 errno = EMSGSIZE;
6136                                 goto out;
6137                         }
6138                         if (nlen > mtu) {
6139                                 break;
6140                         }
6141                 }
6142                 msg->msg_iovlen = i;
6143                 if (msg->msg_iovlen == 0) {
6144                         *tmp_iov = msg->msg_iov[0];
6145                         tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
6146                                                (size_t)mtu);
6147                         msg->msg_iov = tmp_iov;
6148                         msg->msg_iovlen = 1;
6149                 }
6150                 break;
6151         }
6152         case SOCK_DGRAM:
6153                 if (si->connected) {
6154                         if (msg->msg_name != NULL) {
6155                                 /*
6156                                  * We are dealing with unix sockets and if we
6157                                  * are connected, we should only talk to the
6158                                  * connected unix path. Using the fd to send
6159                                  * to another server would be hard to achieve.
6160                                  */
6161                                 msg->msg_name = NULL;
6162                                 msg->msg_namelen = 0;
6163                         }
6164                 } else {
6165                         const struct sockaddr *msg_name;
6166                         msg_name = (const struct sockaddr *)msg->msg_name;
6167
6168                         if (msg_name == NULL) {
6169                                 errno = ENOTCONN;
6170                                 goto out;
6171                         }
6172
6173
6174                         ret = sockaddr_convert_to_un(si, msg_name, msg->msg_namelen,
6175                                                      tmp_un, 0, bcast);
6176                         if (ret == -1) {
6177                                 goto out;
6178                         }
6179
6180                         if (to_un) {
6181                                 *to_un = tmp_un;
6182                         }
6183                         if (to) {
6184                                 *to = msg_name;
6185                         }
6186                         msg->msg_name = tmp_un;
6187                         msg->msg_namelen = sizeof(*tmp_un);
6188                 }
6189
6190                 if (si->bound == 0) {
6191                         ret = swrap_auto_bind(fd, si, si->family);
6192                         if (ret == -1) {
6193                                 SWRAP_UNLOCK_SI(si);
6194                                 if (errno == ENOTSOCK) {
6195                                         swrap_remove_stale(fd);
6196                                         ret = -ENOTSOCK;
6197                                 } else {
6198                                         SWRAP_LOG(SWRAP_LOG_ERROR, "swrap_sendmsg_before failed");
6199                                 }
6200                                 return ret;
6201                         }
6202                 }
6203
6204                 if (!si->defer_connect) {
6205                         break;
6206                 }
6207
6208                 ret = sockaddr_convert_to_un(si,
6209                                              &si->peername.sa.s,
6210                                              si->peername.sa_socklen,
6211                                              tmp_un,
6212                                              0,
6213                                              NULL);
6214                 if (ret == -1) {
6215                         goto out;
6216                 }
6217
6218                 ret = libc_connect(fd,
6219                                    (struct sockaddr *)(void *)tmp_un,
6220                                    sizeof(*tmp_un));
6221
6222                 /* to give better errors */
6223                 if (ret == -1 && errno == ENOENT) {
6224                         errno = EHOSTUNREACH;
6225                 }
6226
6227                 if (ret == -1) {
6228                         goto out;
6229                 }
6230
6231                 si->defer_connect = 0;
6232                 break;
6233         default:
6234                 errno = EHOSTUNREACH;
6235                 goto out;
6236         }
6237
6238         ret = 0;
6239 out:
6240         SWRAP_UNLOCK_SI(si);
6241
6242         return ret;
6243 }
6244
6245 static void swrap_sendmsg_after(int fd,
6246                                 struct socket_info *si,
6247                                 struct msghdr *msg,
6248                                 const struct sockaddr *to,
6249                                 ssize_t ret)
6250 {
6251         int saved_errno = errno;
6252         size_t i, len = 0;
6253         uint8_t *buf;
6254         off_t ofs = 0;
6255         size_t avail = 0;
6256         size_t remain;
6257
6258         /* to give better errors */
6259         if (ret == -1) {
6260                 if (saved_errno == ENOENT) {
6261                         saved_errno = EHOSTUNREACH;
6262                 } else if (saved_errno == ENOTSOCK) {
6263                         /* If the fd is not a socket, remove it */
6264                         swrap_remove_stale(fd);
6265                 }
6266         }
6267
6268         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6269                 avail += msg->msg_iov[i].iov_len;
6270         }
6271
6272         if (ret == -1) {
6273                 remain = MIN(80, avail);
6274         } else {
6275                 remain = ret;
6276         }
6277
6278         /* we capture it as one single packet */
6279         buf = (uint8_t *)malloc(remain);
6280         if (!buf) {
6281                 /* we just not capture the packet */
6282                 errno = saved_errno;
6283                 return;
6284         }
6285
6286         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6287                 size_t this_time = MIN(remain, (size_t)msg->msg_iov[i].iov_len);
6288                 memcpy(buf + ofs,
6289                        msg->msg_iov[i].iov_base,
6290                        this_time);
6291                 ofs += this_time;
6292                 remain -= this_time;
6293         }
6294         len = ofs;
6295
6296         SWRAP_LOCK_SI(si);
6297
6298         switch (si->type) {
6299         case SOCK_STREAM:
6300                 if (ret == -1) {
6301                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND, buf, len);
6302                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND_RST, NULL, 0);
6303                 } else {
6304                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND, buf, len);
6305                 }
6306                 break;
6307
6308         case SOCK_DGRAM:
6309                 if (si->connected) {
6310                         to = &si->peername.sa.s;
6311                 }
6312                 if (ret == -1) {
6313                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6314                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO_UNREACH, buf, len);
6315                 } else {
6316                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6317                 }
6318                 break;
6319         }
6320
6321         SWRAP_UNLOCK_SI(si);
6322
6323         free(buf);
6324         errno = saved_errno;
6325 }
6326
6327 static int swrap_recvmsg_before(int fd,
6328                                 struct socket_info *si,
6329                                 struct msghdr *msg,
6330                                 struct iovec *tmp_iov)
6331 {
6332         size_t i, len = 0;
6333         int ret = -1;
6334
6335         SWRAP_LOCK_SI(si);
6336
6337         (void)fd; /* unused */
6338
6339         switch (si->type) {
6340         case SOCK_STREAM: {
6341                 unsigned int mtu;
6342                 if (!si->connected) {
6343                         errno = ENOTCONN;
6344                         goto out;
6345                 }
6346
6347                 if (msg->msg_iovlen == 0) {
6348                         break;
6349                 }
6350
6351                 mtu = socket_wrapper_mtu();
6352                 for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6353                         size_t nlen;
6354                         nlen = len + msg->msg_iov[i].iov_len;
6355                         if (nlen > mtu) {
6356                                 break;
6357                         }
6358                 }
6359                 msg->msg_iovlen = i;
6360                 if (msg->msg_iovlen == 0) {
6361                         *tmp_iov = msg->msg_iov[0];
6362                         tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
6363                                                (size_t)mtu);
6364                         msg->msg_iov = tmp_iov;
6365                         msg->msg_iovlen = 1;
6366                 }
6367                 break;
6368         }
6369         case SOCK_DGRAM:
6370                 if (msg->msg_name == NULL) {
6371                         errno = EINVAL;
6372                         goto out;
6373                 }
6374
6375                 if (msg->msg_iovlen == 0) {
6376                         break;
6377                 }
6378
6379                 if (si->bound == 0) {
6380                         ret = swrap_auto_bind(fd, si, si->family);
6381                         if (ret == -1) {
6382                                 SWRAP_UNLOCK_SI(si);
6383                                 /*
6384                                  * When attempting to read or write to a
6385                                  * descriptor, if an underlying autobind fails
6386                                  * because it's not a socket, stop intercepting
6387                                  * uses of that descriptor.
6388                                  */
6389                                 if (errno == ENOTSOCK) {
6390                                         swrap_remove_stale(fd);
6391                                         ret = -ENOTSOCK;
6392                                 } else {
6393                                         SWRAP_LOG(SWRAP_LOG_ERROR,
6394                                                   "swrap_recvmsg_before failed");
6395                                 }
6396                                 return ret;
6397                         }
6398                 }
6399                 break;
6400         default:
6401                 errno = EHOSTUNREACH;
6402                 goto out;
6403         }
6404
6405         ret = 0;
6406 out:
6407         SWRAP_UNLOCK_SI(si);
6408
6409         return ret;
6410 }
6411
6412 static int swrap_recvmsg_after(int fd,
6413                                struct socket_info *si,
6414                                struct msghdr *msg,
6415                                const struct sockaddr_un *un_addr,
6416                                socklen_t un_addrlen,
6417                                ssize_t ret)
6418 {
6419         int saved_errno = errno;
6420         size_t i;
6421         uint8_t *buf = NULL;
6422         off_t ofs = 0;
6423         size_t avail = 0;
6424         size_t remain;
6425         int rc;
6426
6427         /* to give better errors */
6428         if (ret == -1) {
6429                 if (saved_errno == ENOENT) {
6430                         saved_errno = EHOSTUNREACH;
6431                 } else if (saved_errno == ENOTSOCK) {
6432                         /* If the fd is not a socket, remove it */
6433                         swrap_remove_stale(fd);
6434                 }
6435         }
6436
6437         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6438                 avail += msg->msg_iov[i].iov_len;
6439         }
6440
6441         SWRAP_LOCK_SI(si);
6442
6443         /* Convert the socket address before we leave */
6444         if (si->type == SOCK_DGRAM && un_addr != NULL) {
6445                 rc = sockaddr_convert_from_un(si,
6446                                               un_addr,
6447                                               un_addrlen,
6448                                               si->family,
6449                                               msg->msg_name,
6450                                               &msg->msg_namelen);
6451                 if (rc == -1) {
6452                         goto done;
6453                 }
6454         }
6455
6456         if (avail == 0) {
6457                 rc = 0;
6458                 goto done;
6459         }
6460
6461         if (ret == -1) {
6462                 remain = MIN(80, avail);
6463         } else {
6464                 remain = ret;
6465         }
6466
6467         /* we capture it as one single packet */
6468         buf = (uint8_t *)malloc(remain);
6469         if (buf == NULL) {
6470                 /* we just not capture the packet */
6471                 SWRAP_UNLOCK_SI(si);
6472                 errno = saved_errno;
6473                 return -1;
6474         }
6475
6476         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6477                 size_t this_time = MIN(remain, (size_t)msg->msg_iov[i].iov_len);
6478                 memcpy(buf + ofs,
6479                        msg->msg_iov[i].iov_base,
6480                        this_time);
6481                 ofs += this_time;
6482                 remain -= this_time;
6483         }
6484
6485         switch (si->type) {
6486         case SOCK_STREAM:
6487                 if (ret == -1 && saved_errno != EAGAIN && saved_errno != ENOBUFS) {
6488                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV_RST, NULL, 0);
6489                 } else if (ret == 0) { /* END OF FILE */
6490                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV_RST, NULL, 0);
6491                 } else if (ret > 0) {
6492                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV, buf, ret);
6493                 }
6494                 break;
6495
6496         case SOCK_DGRAM:
6497                 if (ret == -1) {
6498                         break;
6499                 }
6500
6501                 if (un_addr != NULL) {
6502                         swrap_pcap_dump_packet(si,
6503                                           msg->msg_name,
6504                                           SWRAP_RECVFROM,
6505                                           buf,
6506                                           ret);
6507                 } else {
6508                         swrap_pcap_dump_packet(si,
6509                                           msg->msg_name,
6510                                           SWRAP_RECV,
6511                                           buf,
6512                                           ret);
6513                 }
6514
6515                 break;
6516         }
6517
6518         rc = 0;
6519 done:
6520         free(buf);
6521         errno = saved_errno;
6522
6523 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6524         if (rc == 0 &&
6525             msg->msg_controllen > 0 &&
6526             msg->msg_control != NULL) {
6527                 rc = swrap_msghdr_add_socket_info(si, msg);
6528                 if (rc < 0) {
6529                         SWRAP_UNLOCK_SI(si);
6530                         return -1;
6531                 }
6532         }
6533 #endif
6534
6535         SWRAP_UNLOCK_SI(si);
6536         return rc;
6537 }
6538
6539 /****************************************************************************
6540  *   RECVFROM
6541  ***************************************************************************/
6542
6543 static ssize_t swrap_recvfrom(int s, void *buf, size_t len, int flags,
6544                               struct sockaddr *from, socklen_t *fromlen)
6545 {
6546         struct swrap_address from_addr = {
6547                 .sa_socklen = sizeof(struct sockaddr_un),
6548         };
6549         ssize_t ret;
6550         struct socket_info *si = find_socket_info(s);
6551         struct swrap_address saddr = {
6552                 .sa_socklen = sizeof(struct sockaddr_storage),
6553         };
6554         struct msghdr msg;
6555         struct iovec tmp;
6556         int tret;
6557
6558         if (!si) {
6559                 return libc_recvfrom(s,
6560                                      buf,
6561                                      len,
6562                                      flags,
6563                                      from,
6564                                      fromlen);
6565         }
6566
6567         tmp.iov_base = buf;
6568         tmp.iov_len = len;
6569
6570         ZERO_STRUCT(msg);
6571         if (from != NULL && fromlen != NULL) {
6572                 msg.msg_name = from;   /* optional address */
6573                 msg.msg_namelen = *fromlen; /* size of address */
6574         } else {
6575                 msg.msg_name = &saddr.sa.s; /* optional address */
6576                 msg.msg_namelen = saddr.sa_socklen; /* size of address */
6577         }
6578         msg.msg_iov = &tmp;            /* scatter/gather array */
6579         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6580 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6581         msg.msg_control = NULL;        /* ancillary data, see below */
6582         msg.msg_controllen = 0;        /* ancillary data buffer len */
6583         msg.msg_flags = 0;             /* flags on received message */
6584 #endif
6585
6586         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6587         if (tret < 0) {
6588                 return -1;
6589         }
6590
6591         buf = msg.msg_iov[0].iov_base;
6592         len = msg.msg_iov[0].iov_len;
6593
6594         ret = libc_recvfrom(s,
6595                             buf,
6596                             len,
6597                             flags,
6598                             &from_addr.sa.s,
6599                             &from_addr.sa_socklen);
6600         if (ret == -1) {
6601                 return ret;
6602         }
6603
6604         tret = swrap_recvmsg_after(s,
6605                                    si,
6606                                    &msg,
6607                                    &from_addr.sa.un,
6608                                    from_addr.sa_socklen,
6609                                    ret);
6610         if (tret != 0) {
6611                 return tret;
6612         }
6613
6614         if (from != NULL && fromlen != NULL) {
6615                 *fromlen = msg.msg_namelen;
6616         }
6617
6618         return ret;
6619 }
6620
6621 #ifdef HAVE_ACCEPT_PSOCKLEN_T
6622 ssize_t recvfrom(int s, void *buf, size_t len, int flags,
6623                  struct sockaddr *from, Psocklen_t fromlen)
6624 #else
6625 ssize_t recvfrom(int s, void *buf, size_t len, int flags,
6626                  struct sockaddr *from, socklen_t *fromlen)
6627 #endif
6628 {
6629         return swrap_recvfrom(s, buf, len, flags, from, (socklen_t *)fromlen);
6630 }
6631
6632 /****************************************************************************
6633  *   SENDTO
6634  ***************************************************************************/
6635
6636 static ssize_t swrap_sendto(int s, const void *buf, size_t len, int flags,
6637                             const struct sockaddr *to, socklen_t tolen)
6638 {
6639         struct msghdr msg;
6640         struct iovec tmp;
6641         struct swrap_address un_addr = {
6642                 .sa_socklen = sizeof(struct sockaddr_un),
6643         };
6644         const struct sockaddr_un *to_un = NULL;
6645         ssize_t ret;
6646         int rc;
6647         struct socket_info *si = find_socket_info(s);
6648         int bcast = 0;
6649
6650         if (!si) {
6651                 return libc_sendto(s, buf, len, flags, to, tolen);
6652         }
6653
6654         tmp.iov_base = discard_const_p(char, buf);
6655         tmp.iov_len = len;
6656
6657         ZERO_STRUCT(msg);
6658         msg.msg_name = discard_const_p(struct sockaddr, to); /* optional address */
6659         msg.msg_namelen = tolen;       /* size of address */
6660         msg.msg_iov = &tmp;            /* scatter/gather array */
6661         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6662 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6663         msg.msg_control = NULL;        /* ancillary data, see below */
6664         msg.msg_controllen = 0;        /* ancillary data buffer len */
6665         msg.msg_flags = 0;             /* flags on received message */
6666 #endif
6667
6668         rc = swrap_sendmsg_before(s,
6669                                   si,
6670                                   &msg,
6671                                   &tmp,
6672                                   &un_addr.sa.un,
6673                                   &to_un,
6674                                   &to,
6675                                   &bcast);
6676         if (rc < 0) {
6677                 return -1;
6678         }
6679
6680         buf = msg.msg_iov[0].iov_base;
6681         len = msg.msg_iov[0].iov_len;
6682
6683         if (bcast) {
6684                 struct stat st;
6685                 unsigned int iface;
6686                 unsigned int prt = ntohs(((const struct sockaddr_in *)(const void *)to)->sin_port);
6687                 char type;
6688                 char *swrap_dir = NULL;
6689
6690                 type = SOCKET_TYPE_CHAR_UDP;
6691
6692                 swrap_dir = socket_wrapper_dir();
6693                 if (swrap_dir == NULL) {
6694                         return -1;
6695                 }
6696
6697                 for(iface=0; iface <= MAX_WRAPPED_INTERFACES; iface++) {
6698                         swrap_un_path(&un_addr.sa.un,
6699                                       swrap_dir,
6700                                       type,
6701                                       iface,
6702                                       prt);
6703                         if (stat(un_addr.sa.un.sun_path, &st) != 0) continue;
6704
6705                         /* ignore the any errors in broadcast sends */
6706                         libc_sendto(s,
6707                                     buf,
6708                                     len,
6709                                     flags,
6710                                     &un_addr.sa.s,
6711                                     un_addr.sa_socklen);
6712                 }
6713
6714                 SAFE_FREE(swrap_dir);
6715
6716                 SWRAP_LOCK_SI(si);
6717
6718                 swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6719
6720                 SWRAP_UNLOCK_SI(si);
6721
6722                 return len;
6723         }
6724
6725         SWRAP_LOCK_SI(si);
6726         /*
6727          * If it is a dgram socket and we are connected, don't include the
6728          * 'to' address.
6729          */
6730         if (si->type == SOCK_DGRAM && si->connected) {
6731                 ret = libc_sendto(s,
6732                                   buf,
6733                                   len,
6734                                   flags,
6735                                   NULL,
6736                                   0);
6737         } else {
6738                 ret = libc_sendto(s,
6739                                   buf,
6740                                   len,
6741                                   flags,
6742                                   (struct sockaddr *)msg.msg_name,
6743                                   msg.msg_namelen);
6744         }
6745
6746         SWRAP_UNLOCK_SI(si);
6747
6748         swrap_sendmsg_after(s, si, &msg, to, ret);
6749
6750         return ret;
6751 }
6752
6753 ssize_t sendto(int s, const void *buf, size_t len, int flags,
6754                const struct sockaddr *to, socklen_t tolen)
6755 {
6756         return swrap_sendto(s, buf, len, flags, to, tolen);
6757 }
6758
6759 /****************************************************************************
6760  *   READV
6761  ***************************************************************************/
6762
6763 static ssize_t swrap_recv(int s, void *buf, size_t len, int flags)
6764 {
6765         struct socket_info *si;
6766         struct msghdr msg;
6767         struct swrap_address saddr = {
6768                 .sa_socklen = sizeof(struct sockaddr_storage),
6769         };
6770         struct iovec tmp;
6771         ssize_t ret;
6772         int tret;
6773
6774         si = find_socket_info(s);
6775         if (si == NULL) {
6776                 return libc_recv(s, buf, len, flags);
6777         }
6778
6779         tmp.iov_base = buf;
6780         tmp.iov_len = len;
6781
6782         ZERO_STRUCT(msg);
6783         msg.msg_name = &saddr.sa.s;    /* optional address */
6784         msg.msg_namelen = saddr.sa_socklen; /* size of address */
6785         msg.msg_iov = &tmp;            /* scatter/gather array */
6786         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6787 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6788         msg.msg_control = NULL;        /* ancillary data, see below */
6789         msg.msg_controllen = 0;        /* ancillary data buffer len */
6790         msg.msg_flags = 0;             /* flags on received message */
6791 #endif
6792
6793         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6794         if (tret < 0) {
6795                 return -1;
6796         }
6797
6798         buf = msg.msg_iov[0].iov_base;
6799         len = msg.msg_iov[0].iov_len;
6800
6801         ret = libc_recv(s, buf, len, flags);
6802
6803         tret = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
6804         if (tret != 0) {
6805                 return tret;
6806         }
6807
6808         return ret;
6809 }
6810
6811 ssize_t recv(int s, void *buf, size_t len, int flags)
6812 {
6813         return swrap_recv(s, buf, len, flags);
6814 }
6815
6816 /****************************************************************************
6817  *   READ
6818  ***************************************************************************/
6819
6820 static ssize_t swrap_read(int s, void *buf, size_t len)
6821 {
6822         struct socket_info *si;
6823         struct msghdr msg;
6824         struct iovec tmp;
6825         struct swrap_address saddr = {
6826                 .sa_socklen = sizeof(struct sockaddr_storage),
6827         };
6828         ssize_t ret;
6829         int tret;
6830
6831         si = find_socket_info(s);
6832         if (si == NULL) {
6833                 return libc_read(s, buf, len);
6834         }
6835
6836         tmp.iov_base = buf;
6837         tmp.iov_len = len;
6838
6839         ZERO_STRUCT(msg);
6840         msg.msg_name = &saddr.sa.ss;   /* optional address */
6841         msg.msg_namelen = saddr.sa_socklen; /* size of address */
6842         msg.msg_iov = &tmp;            /* scatter/gather array */
6843         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6844 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6845         msg.msg_control = NULL;        /* ancillary data, see below */
6846         msg.msg_controllen = 0;        /* ancillary data buffer len */
6847         msg.msg_flags = 0;             /* flags on received message */
6848 #endif
6849
6850         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6851         if (tret < 0) {
6852                 if (tret == -ENOTSOCK) {
6853                         return libc_read(s, buf, len);
6854                 }
6855                 return -1;
6856         }
6857
6858         buf = msg.msg_iov[0].iov_base;
6859         len = msg.msg_iov[0].iov_len;
6860
6861         ret = libc_read(s, buf, len);
6862
6863         tret = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
6864         if (tret != 0) {
6865                 return tret;
6866         }
6867
6868         return ret;
6869 }
6870
6871 ssize_t read(int s, void *buf, size_t len)
6872 {
6873         return swrap_read(s, buf, len);
6874 }
6875
6876 /****************************************************************************
6877  *   WRITE
6878  ***************************************************************************/
6879
6880 static ssize_t swrap_write(int s, const void *buf, size_t len)
6881 {
6882         struct msghdr msg;
6883         struct iovec tmp;
6884         struct sockaddr_un un_addr;
6885         ssize_t ret;
6886         int rc;
6887         struct socket_info *si;
6888
6889         si = find_socket_info(s);
6890         if (si == NULL) {
6891                 return libc_write(s, buf, len);
6892         }
6893
6894         tmp.iov_base = discard_const_p(char, buf);
6895         tmp.iov_len = len;
6896
6897         ZERO_STRUCT(msg);
6898         msg.msg_name = NULL;           /* optional address */
6899         msg.msg_namelen = 0;           /* size of address */
6900         msg.msg_iov = &tmp;            /* scatter/gather array */
6901         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6902 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6903         msg.msg_control = NULL;        /* ancillary data, see below */
6904         msg.msg_controllen = 0;        /* ancillary data buffer len */
6905         msg.msg_flags = 0;             /* flags on received message */
6906 #endif
6907
6908         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
6909         if (rc < 0) {
6910                 return -1;
6911         }
6912
6913         buf = msg.msg_iov[0].iov_base;
6914         len = msg.msg_iov[0].iov_len;
6915
6916         ret = libc_write(s, buf, len);
6917
6918         swrap_sendmsg_after(s, si, &msg, NULL, ret);
6919
6920         return ret;
6921 }
6922
6923 ssize_t write(int s, const void *buf, size_t len)
6924 {
6925         return swrap_write(s, buf, len);
6926 }
6927
6928 /****************************************************************************
6929  *   SEND
6930  ***************************************************************************/
6931
6932 static ssize_t swrap_send(int s, const void *buf, size_t len, int flags)
6933 {
6934         struct msghdr msg;
6935         struct iovec tmp;
6936         struct sockaddr_un un_addr;
6937         ssize_t ret;
6938         int rc;
6939         struct socket_info *si = find_socket_info(s);
6940
6941         if (!si) {
6942                 return libc_send(s, buf, len, flags);
6943         }
6944
6945         tmp.iov_base = discard_const_p(char, buf);
6946         tmp.iov_len = len;
6947
6948         ZERO_STRUCT(msg);
6949         msg.msg_name = NULL;           /* optional address */
6950         msg.msg_namelen = 0;           /* size of address */
6951         msg.msg_iov = &tmp;            /* scatter/gather array */
6952         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6953 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6954         msg.msg_control = NULL;        /* ancillary data, see below */
6955         msg.msg_controllen = 0;        /* ancillary data buffer len */
6956         msg.msg_flags = 0;             /* flags on received message */
6957 #endif
6958
6959         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
6960         if (rc < 0) {
6961                 return -1;
6962         }
6963
6964         buf = msg.msg_iov[0].iov_base;
6965         len = msg.msg_iov[0].iov_len;
6966
6967         ret = libc_send(s, buf, len, flags);
6968
6969         swrap_sendmsg_after(s, si, &msg, NULL, ret);
6970
6971         return ret;
6972 }
6973
6974 ssize_t send(int s, const void *buf, size_t len, int flags)
6975 {
6976         return swrap_send(s, buf, len, flags);
6977 }
6978
6979 /****************************************************************************
6980  *   RECVMSG
6981  ***************************************************************************/
6982
6983 static ssize_t swrap_recvmsg(int s, struct msghdr *omsg, int flags)
6984 {
6985         struct swrap_address from_addr = {
6986                 .sa_socklen = sizeof(struct sockaddr_un),
6987         };
6988         struct swrap_address convert_addr = {
6989                 .sa_socklen = sizeof(struct sockaddr_storage),
6990         };
6991         struct socket_info *si;
6992         struct msghdr msg;
6993         struct iovec tmp;
6994 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6995         size_t msg_ctrllen_filled;
6996         size_t msg_ctrllen_left;
6997 #endif
6998
6999         ssize_t ret;
7000         int rc;
7001
7002         si = find_socket_info(s);
7003         if (si == NULL) {
7004                 uint8_t *tmp_control = NULL;
7005                 rc = swrap_recvmsg_before_unix(omsg, &msg, &tmp_control);
7006                 if (rc < 0) {
7007                         return rc;
7008                 }
7009                 ret = libc_recvmsg(s, &msg, flags);
7010                 return swrap_recvmsg_after_unix(&msg, &tmp_control, omsg, ret);
7011         }
7012
7013         tmp.iov_base = NULL;
7014         tmp.iov_len = 0;
7015
7016         ZERO_STRUCT(msg);
7017         msg.msg_name = &from_addr.sa;              /* optional address */
7018         msg.msg_namelen = from_addr.sa_socklen;    /* size of address */
7019         msg.msg_iov = omsg->msg_iov;               /* scatter/gather array */
7020         msg.msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7021 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7022         msg_ctrllen_filled = 0;
7023         msg_ctrllen_left = omsg->msg_controllen;
7024
7025         msg.msg_control = omsg->msg_control;       /* ancillary data, see below */
7026         msg.msg_controllen = omsg->msg_controllen; /* ancillary data buffer len */
7027         msg.msg_flags = omsg->msg_flags;           /* flags on received message */
7028 #endif
7029
7030         rc = swrap_recvmsg_before(s, si, &msg, &tmp);
7031         if (rc < 0) {
7032                 return -1;
7033         }
7034
7035         ret = libc_recvmsg(s, &msg, flags);
7036
7037 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7038         msg_ctrllen_filled += msg.msg_controllen;
7039         msg_ctrllen_left -= msg.msg_controllen;
7040
7041         if (omsg->msg_control != NULL) {
7042                 uint8_t *p;
7043
7044                 p = omsg->msg_control;
7045                 p += msg_ctrllen_filled;
7046
7047                 msg.msg_control = p;
7048                 msg.msg_controllen = msg_ctrllen_left;
7049         } else {
7050                 msg.msg_control = NULL;
7051                 msg.msg_controllen = 0;
7052         }
7053 #endif
7054
7055         /*
7056          * We convert the unix address to a IP address so we need a buffer
7057          * which can store the address in case of SOCK_DGRAM, see below.
7058          */
7059         msg.msg_name = &convert_addr.sa;
7060         msg.msg_namelen = convert_addr.sa_socklen;
7061
7062         rc = swrap_recvmsg_after(s,
7063                                  si,
7064                                  &msg,
7065                                  &from_addr.sa.un,
7066                                  from_addr.sa_socklen,
7067                                  ret);
7068         if (rc != 0) {
7069                 return rc;
7070         }
7071
7072 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7073         if (omsg->msg_control != NULL) {
7074                 /* msg.msg_controllen = space left */
7075                 msg_ctrllen_left = msg.msg_controllen;
7076                 msg_ctrllen_filled = omsg->msg_controllen - msg_ctrllen_left;
7077         }
7078
7079         /* Update the original message length */
7080         omsg->msg_controllen = msg_ctrllen_filled;
7081         omsg->msg_flags = msg.msg_flags;
7082 #endif
7083         omsg->msg_iovlen = msg.msg_iovlen;
7084
7085         SWRAP_LOCK_SI(si);
7086
7087         /*
7088          * From the manpage:
7089          *
7090          * The  msg_name  field  points  to a caller-allocated buffer that is
7091          * used to return the source address if the socket is unconnected.  The
7092          * caller should set msg_namelen to the size of this buffer before this
7093          * call; upon return from a successful call, msg_name will contain the
7094          * length of the returned address.  If the application  does  not  need
7095          * to know the source address, msg_name can be specified as NULL.
7096          */
7097         if (si->type == SOCK_STREAM) {
7098                 omsg->msg_namelen = 0;
7099         } else if (omsg->msg_name != NULL &&
7100                    omsg->msg_namelen != 0 &&
7101                    omsg->msg_namelen >= msg.msg_namelen) {
7102                 memcpy(omsg->msg_name, msg.msg_name, msg.msg_namelen);
7103                 omsg->msg_namelen = msg.msg_namelen;
7104         }
7105
7106         SWRAP_UNLOCK_SI(si);
7107
7108         return ret;
7109 }
7110
7111 ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags)
7112 {
7113         return swrap_recvmsg(sockfd, msg, flags);
7114 }
7115
7116 /****************************************************************************
7117  *   SENDMSG
7118  ***************************************************************************/
7119
7120 static ssize_t swrap_sendmsg(int s, const struct msghdr *omsg, int flags)
7121 {
7122         struct msghdr msg;
7123         struct iovec tmp;
7124         struct sockaddr_un un_addr;
7125         const struct sockaddr_un *to_un = NULL;
7126         const struct sockaddr *to = NULL;
7127         ssize_t ret;
7128         int rc;
7129         struct socket_info *si = find_socket_info(s);
7130         int bcast = 0;
7131
7132         if (!si) {
7133                 int scm_rights_pipe_fd = -1;
7134
7135                 rc = swrap_sendmsg_before_unix(omsg, &msg,
7136                                                &scm_rights_pipe_fd);
7137                 if (rc < 0) {
7138                         return rc;
7139                 }
7140                 ret = libc_sendmsg(s, &msg, flags);
7141                 return swrap_sendmsg_after_unix(&msg, ret, scm_rights_pipe_fd);
7142         }
7143
7144         ZERO_STRUCT(un_addr);
7145
7146         tmp.iov_base = NULL;
7147         tmp.iov_len = 0;
7148
7149         ZERO_STRUCT(msg);
7150
7151         SWRAP_LOCK_SI(si);
7152
7153         if (si->connected == 0) {
7154                 msg.msg_name = omsg->msg_name;             /* optional address */
7155                 msg.msg_namelen = omsg->msg_namelen;       /* size of address */
7156         }
7157         msg.msg_iov = omsg->msg_iov;               /* scatter/gather array */
7158         msg.msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7159
7160         SWRAP_UNLOCK_SI(si);
7161
7162 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7163         if (omsg != NULL && omsg->msg_controllen > 0 && omsg->msg_control != NULL) {
7164                 uint8_t *cmbuf = NULL;
7165                 size_t cmlen = 0;
7166
7167                 rc = swrap_sendmsg_filter_cmsghdr(omsg, &cmbuf, &cmlen);
7168                 if (rc < 0) {
7169                         return rc;
7170                 }
7171
7172                 if (cmlen == 0) {
7173                         msg.msg_controllen = 0;
7174                         msg.msg_control = NULL;
7175                 } else {
7176                         msg.msg_control = cmbuf;
7177                         msg.msg_controllen = cmlen;
7178                 }
7179         }
7180         msg.msg_flags = omsg->msg_flags;           /* flags on received message */
7181 #endif
7182         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, &to_un, &to, &bcast);
7183         if (rc < 0) {
7184                 int saved_errno = errno;
7185 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7186                 SAFE_FREE(msg.msg_control);
7187 #endif
7188                 errno = saved_errno;
7189                 return -1;
7190         }
7191
7192         if (bcast) {
7193                 struct stat st;
7194                 unsigned int iface;
7195                 unsigned int prt = ntohs(((const struct sockaddr_in *)(const void *)to)->sin_port);
7196                 char type;
7197                 size_t i, len = 0;
7198                 uint8_t *buf;
7199                 off_t ofs = 0;
7200                 size_t avail = 0;
7201                 size_t remain;
7202                 char *swrap_dir = NULL;
7203
7204                 for (i = 0; i < (size_t)msg.msg_iovlen; i++) {
7205                         avail += msg.msg_iov[i].iov_len;
7206                 }
7207
7208                 len = avail;
7209                 remain = avail;
7210
7211                 /* we capture it as one single packet */
7212                 buf = (uint8_t *)malloc(remain);
7213                 if (!buf) {
7214                         int saved_errno = errno;
7215 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7216                         SAFE_FREE(msg.msg_control);
7217 #endif
7218                         errno = saved_errno;
7219                         return -1;
7220                 }
7221
7222                 for (i = 0; i < (size_t)msg.msg_iovlen; i++) {
7223                         size_t this_time = MIN(remain, (size_t)msg.msg_iov[i].iov_len);
7224                         memcpy(buf + ofs,
7225                                msg.msg_iov[i].iov_base,
7226                                this_time);
7227                         ofs += this_time;
7228                         remain -= this_time;
7229                 }
7230
7231                 type = SOCKET_TYPE_CHAR_UDP;
7232
7233                 swrap_dir = socket_wrapper_dir();
7234                 if (swrap_dir == NULL) {
7235                         int saved_errno = errno;
7236 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7237                         SAFE_FREE(msg.msg_control);
7238 #endif
7239                         SAFE_FREE(buf);
7240                         errno = saved_errno;
7241                         return -1;
7242                 }
7243
7244                 for(iface=0; iface <= MAX_WRAPPED_INTERFACES; iface++) {
7245                         swrap_un_path(&un_addr, swrap_dir, type, iface, prt);
7246                         if (stat(un_addr.sun_path, &st) != 0) continue;
7247
7248                         msg.msg_name = &un_addr;           /* optional address */
7249                         msg.msg_namelen = sizeof(un_addr); /* size of address */
7250
7251                         /* ignore the any errors in broadcast sends */
7252                         libc_sendmsg(s, &msg, flags);
7253                 }
7254
7255                 SAFE_FREE(swrap_dir);
7256
7257                 SWRAP_LOCK_SI(si);
7258
7259                 swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
7260                 free(buf);
7261
7262                 SWRAP_UNLOCK_SI(si);
7263
7264                 return len;
7265         }
7266
7267         ret = libc_sendmsg(s, &msg, flags);
7268
7269         swrap_sendmsg_after(s, si, &msg, to, ret);
7270
7271 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7272         {
7273                 int saved_errno = errno;
7274                 SAFE_FREE(msg.msg_control);
7275                 errno = saved_errno;
7276         }
7277 #endif
7278
7279         return ret;
7280 }
7281
7282 ssize_t sendmsg(int s, const struct msghdr *omsg, int flags)
7283 {
7284         return swrap_sendmsg(s, omsg, flags);
7285 }
7286
7287 /****************************************************************************
7288  *   READV
7289  ***************************************************************************/
7290
7291 static ssize_t swrap_readv(int s, const struct iovec *vector, int count)
7292 {
7293         struct socket_info *si;
7294         struct msghdr msg;
7295         struct iovec tmp;
7296         struct swrap_address saddr = {
7297                 .sa_socklen = sizeof(struct sockaddr_storage)
7298         };
7299         ssize_t ret;
7300         int rc;
7301
7302         si = find_socket_info(s);
7303         if (si == NULL) {
7304                 return libc_readv(s, vector, count);
7305         }
7306
7307         tmp.iov_base = NULL;
7308         tmp.iov_len = 0;
7309
7310         ZERO_STRUCT(msg);
7311         msg.msg_name = &saddr.sa.s; /* optional address */
7312         msg.msg_namelen = saddr.sa_socklen;      /* size of address */
7313         msg.msg_iov = discard_const_p(struct iovec, vector); /* scatter/gather array */
7314         msg.msg_iovlen = count;        /* # elements in msg_iov */
7315 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7316         msg.msg_control = NULL;        /* ancillary data, see below */
7317         msg.msg_controllen = 0;        /* ancillary data buffer len */
7318         msg.msg_flags = 0;             /* flags on received message */
7319 #endif
7320
7321         rc = swrap_recvmsg_before(s, si, &msg, &tmp);
7322         if (rc < 0) {
7323                 if (rc == -ENOTSOCK) {
7324                         return libc_readv(s, vector, count);
7325                 }
7326                 return -1;
7327         }
7328
7329         ret = libc_readv(s, msg.msg_iov, msg.msg_iovlen);
7330
7331         rc = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
7332         if (rc != 0) {
7333                 return rc;
7334         }
7335
7336         return ret;
7337 }
7338
7339 ssize_t readv(int s, const struct iovec *vector, int count)
7340 {
7341         return swrap_readv(s, vector, count);
7342 }
7343
7344 /****************************************************************************
7345  *   WRITEV
7346  ***************************************************************************/
7347
7348 static ssize_t swrap_writev(int s, const struct iovec *vector, int count)
7349 {
7350         struct msghdr msg;
7351         struct iovec tmp;
7352         struct sockaddr_un un_addr;
7353         ssize_t ret;
7354         int rc;
7355         struct socket_info *si = find_socket_info(s);
7356
7357         if (!si) {
7358                 return libc_writev(s, vector, count);
7359         }
7360
7361         tmp.iov_base = NULL;
7362         tmp.iov_len = 0;
7363
7364         ZERO_STRUCT(msg);
7365         msg.msg_name = NULL;           /* optional address */
7366         msg.msg_namelen = 0;           /* size of address */
7367         msg.msg_iov = discard_const_p(struct iovec, vector); /* scatter/gather array */
7368         msg.msg_iovlen = count;        /* # elements in msg_iov */
7369 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7370         msg.msg_control = NULL;        /* ancillary data, see below */
7371         msg.msg_controllen = 0;        /* ancillary data buffer len */
7372         msg.msg_flags = 0;             /* flags on received message */
7373 #endif
7374
7375         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
7376         if (rc < 0) {
7377                 if (rc == -ENOTSOCK) {
7378                         return libc_readv(s, vector, count);
7379                 }
7380                 return -1;
7381         }
7382
7383         ret = libc_writev(s, msg.msg_iov, msg.msg_iovlen);
7384
7385         swrap_sendmsg_after(s, si, &msg, NULL, ret);
7386
7387         return ret;
7388 }
7389
7390 ssize_t writev(int s, const struct iovec *vector, int count)
7391 {
7392         return swrap_writev(s, vector, count);
7393 }
7394
7395 /****************************
7396  * CLOSE
7397  ***************************/
7398
7399 static int swrap_remove_wrapper(const char *__func_name,
7400                                 int (*__close_fd_fn)(int fd),
7401                                 int fd)
7402 {
7403         struct socket_info *si = NULL;
7404         int si_index;
7405         int ret_errno = errno;
7406         int ret;
7407
7408         swrap_mutex_lock(&socket_reset_mutex);
7409
7410         si_index = find_socket_info_index(fd);
7411         if (si_index == -1) {
7412                 swrap_mutex_unlock(&socket_reset_mutex);
7413                 return __close_fd_fn(fd);
7414         }
7415
7416         swrap_log(SWRAP_LOG_TRACE, __func_name, "Remove wrapper for fd=%d", fd);
7417         reset_socket_info_index(fd);
7418
7419         si = swrap_get_socket_info(si_index);
7420
7421         swrap_mutex_lock(&first_free_mutex);
7422         SWRAP_LOCK_SI(si);
7423
7424         ret = __close_fd_fn(fd);
7425         if (ret == -1) {
7426                 ret_errno = errno;
7427         }
7428
7429         swrap_dec_refcount(si);
7430
7431         if (swrap_get_refcount(si) > 0) {
7432                 /* there are still references left */
7433                 goto out;
7434         }
7435
7436         if (si->fd_passed) {
7437                 goto set_next_free;
7438         }
7439
7440         if (si->myname.sa_socklen > 0 && si->peername.sa_socklen > 0) {
7441                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_SEND, NULL, 0);
7442         }
7443
7444         if (si->myname.sa_socklen > 0 && si->peername.sa_socklen > 0) {
7445                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_RECV, NULL, 0);
7446                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_ACK, NULL, 0);
7447         }
7448
7449         if (si->un_addr.sun_path[0] != '\0') {
7450                 unlink(si->un_addr.sun_path);
7451         }
7452
7453 set_next_free:
7454         swrap_set_next_free(si, first_free);
7455         first_free = si_index;
7456
7457 out:
7458         SWRAP_UNLOCK_SI(si);
7459         swrap_mutex_unlock(&first_free_mutex);
7460         swrap_mutex_unlock(&socket_reset_mutex);
7461
7462         errno = ret_errno;
7463         return ret;
7464 }
7465
7466 static int swrap_noop_close(int fd)
7467 {
7468         (void)fd; /* unused */
7469         return 0;
7470 }
7471
7472 static void swrap_remove_stale(int fd)
7473 {
7474         swrap_remove_wrapper(__func__, swrap_noop_close, fd);
7475 }
7476
7477 /*
7478  * This allows socket_wrapper aware applications to
7479  * indicate that the given fd does not belong to
7480  * an inet socket.
7481  *
7482  * We already overload a lot of unrelated functions
7483  * like eventfd(), timerfd_create(), ... in order to
7484  * call swrap_remove_stale() on the returned fd, but
7485  * we'll never be able to handle all possible syscalls.
7486  *
7487  * socket_wrapper_indicate_no_inet_fd() gives them a way
7488  * to do the same.
7489  *
7490  * We don't export swrap_remove_stale() in order to
7491  * make it easier to analyze SOCKET_WRAPPER_DEBUGLEVEL=3
7492  * log files.
7493  */
7494 void socket_wrapper_indicate_no_inet_fd(int fd)
7495 {
7496         swrap_remove_wrapper(__func__, swrap_noop_close, fd);
7497 }
7498
7499 static int swrap_close(int fd)
7500 {
7501         return swrap_remove_wrapper(__func__, libc_close, fd);
7502 }
7503
7504 int close(int fd)
7505 {
7506         return swrap_close(fd);
7507 }
7508
7509 #ifdef HAVE___CLOSE_NOCANCEL
7510
7511 static int swrap___close_nocancel(int fd)
7512 {
7513         return swrap_remove_wrapper(__func__, libc___close_nocancel, fd);
7514 }
7515
7516 int __close_nocancel(int fd);
7517 int __close_nocancel(int fd)
7518 {
7519         return swrap___close_nocancel(fd);
7520 }
7521
7522 #endif /* HAVE___CLOSE_NOCANCEL */
7523
7524 /****************************
7525  * DUP
7526  ***************************/
7527
7528 static int swrap_dup(int fd)
7529 {
7530         struct socket_info *si;
7531         int dup_fd, idx;
7532
7533         idx = find_socket_info_index(fd);
7534         if (idx == -1) {
7535                 return libc_dup(fd);
7536         }
7537
7538         si = swrap_get_socket_info(idx);
7539
7540         dup_fd = libc_dup(fd);
7541         if (dup_fd == -1) {
7542                 int saved_errno = errno;
7543                 errno = saved_errno;
7544                 return -1;
7545         }
7546
7547         if ((size_t)dup_fd >= socket_fds_max) {
7548                 SWRAP_LOG(SWRAP_LOG_ERROR,
7549                           "The max socket index limit of %zu has been reached, "
7550                           "trying to add %d",
7551                           socket_fds_max,
7552                           dup_fd);
7553                 libc_close(dup_fd);
7554                 errno = EMFILE;
7555                 return -1;
7556         }
7557
7558         SWRAP_LOCK_SI(si);
7559
7560         swrap_inc_refcount(si);
7561
7562         SWRAP_UNLOCK_SI(si);
7563
7564         /* Make sure we don't have an entry for the fd */
7565         swrap_remove_stale(dup_fd);
7566
7567         set_socket_info_index(dup_fd, idx);
7568
7569         return dup_fd;
7570 }
7571
7572 int dup(int fd)
7573 {
7574         return swrap_dup(fd);
7575 }
7576
7577 /****************************
7578  * DUP2
7579  ***************************/
7580
7581 static int swrap_dup2(int fd, int newfd)
7582 {
7583         struct socket_info *si;
7584         int dup_fd, idx;
7585
7586         idx = find_socket_info_index(fd);
7587         if (idx == -1) {
7588                 return libc_dup2(fd, newfd);
7589         }
7590
7591         si = swrap_get_socket_info(idx);
7592
7593         if (fd == newfd) {
7594                 /*
7595                  * According to the manpage:
7596                  *
7597                  * "If oldfd is a valid file descriptor, and newfd has the same
7598                  * value as oldfd, then dup2() does nothing, and returns newfd."
7599                  */
7600                 return newfd;
7601         }
7602
7603         if ((size_t)newfd >= socket_fds_max) {
7604                 SWRAP_LOG(SWRAP_LOG_ERROR,
7605                           "The max socket index limit of %zu has been reached, "
7606                           "trying to add %d",
7607                           socket_fds_max,
7608                           newfd);
7609                 errno = EMFILE;
7610                 return -1;
7611         }
7612
7613         if (find_socket_info(newfd)) {
7614                 /* dup2() does an implicit close of newfd, which we
7615                  * need to emulate */
7616                 swrap_close(newfd);
7617         }
7618
7619         dup_fd = libc_dup2(fd, newfd);
7620         if (dup_fd == -1) {
7621                 int saved_errno = errno;
7622                 errno = saved_errno;
7623                 return -1;
7624         }
7625
7626         SWRAP_LOCK_SI(si);
7627
7628         swrap_inc_refcount(si);
7629
7630         SWRAP_UNLOCK_SI(si);
7631
7632         /* Make sure we don't have an entry for the fd */
7633         swrap_remove_stale(dup_fd);
7634
7635         set_socket_info_index(dup_fd, idx);
7636
7637         return dup_fd;
7638 }
7639
7640 int dup2(int fd, int newfd)
7641 {
7642         return swrap_dup2(fd, newfd);
7643 }
7644
7645 /****************************
7646  * FCNTL
7647  ***************************/
7648
7649 static int swrap_vfcntl(int fd, int cmd, va_list va)
7650 {
7651         struct socket_info *si;
7652         int rc, dup_fd, idx;
7653
7654         idx = find_socket_info_index(fd);
7655         if (idx == -1) {
7656                 return libc_vfcntl(fd, cmd, va);
7657         }
7658
7659         si = swrap_get_socket_info(idx);
7660
7661         switch (cmd) {
7662         case F_DUPFD:
7663                 dup_fd = libc_vfcntl(fd, cmd, va);
7664                 if (dup_fd == -1) {
7665                         int saved_errno = errno;
7666                         errno = saved_errno;
7667                         return -1;
7668                 }
7669
7670                 /* Make sure we don't have an entry for the fd */
7671                 swrap_remove_stale(dup_fd);
7672
7673                 if ((size_t)dup_fd >= socket_fds_max) {
7674                         SWRAP_LOG(SWRAP_LOG_ERROR,
7675                           "The max socket index limit of %zu has been reached, "
7676                           "trying to add %d",
7677                           socket_fds_max,
7678                           dup_fd);
7679                         libc_close(dup_fd);
7680                         errno = EMFILE;
7681                         return -1;
7682                 }
7683
7684                 SWRAP_LOCK_SI(si);
7685
7686                 swrap_inc_refcount(si);
7687
7688                 SWRAP_UNLOCK_SI(si);
7689
7690
7691                 set_socket_info_index(dup_fd, idx);
7692
7693                 rc = dup_fd;
7694                 break;
7695         default:
7696                 rc = libc_vfcntl(fd, cmd, va);
7697                 break;
7698         }
7699
7700         return rc;
7701 }
7702
7703 int fcntl(int fd, int cmd, ...)
7704 {
7705         va_list va;
7706         int rc;
7707
7708         va_start(va, cmd);
7709
7710         rc = swrap_vfcntl(fd, cmd, va);
7711
7712         va_end(va);
7713
7714         return rc;
7715 }
7716
7717 /****************************
7718  * EVENTFD
7719  ***************************/
7720
7721 #ifdef HAVE_EVENTFD
7722 static int swrap_eventfd(int count, int flags)
7723 {
7724         int fd;
7725
7726         fd = libc_eventfd(count, flags);
7727         if (fd != -1) {
7728                 swrap_remove_stale(fd);
7729         }
7730
7731         return fd;
7732 }
7733
7734 #ifdef HAVE_EVENTFD_UNSIGNED_INT
7735 int eventfd(unsigned int count, int flags)
7736 #else
7737 int eventfd(int count, int flags)
7738 #endif
7739 {
7740         return swrap_eventfd(count, flags);
7741 }
7742 #endif
7743
7744 #ifdef HAVE_PLEDGE
7745 int pledge(const char *promises, const char *paths[])
7746 {
7747         (void)promises; /* unused */
7748         (void)paths; /* unused */
7749
7750         return 0;
7751 }
7752 #endif /* HAVE_PLEDGE */
7753
7754 static void swrap_thread_prepare(void)
7755 {
7756         /*
7757          * This function should only be called here!!
7758          *
7759          * We bind all symobls to avoid deadlocks of the fork is
7760          * interrupted by a signal handler using a symbol of this
7761          * library.
7762          */
7763         swrap_bind_symbol_all();
7764
7765         SWRAP_LOCK_ALL;
7766 }
7767
7768 static void swrap_thread_parent(void)
7769 {
7770         SWRAP_UNLOCK_ALL;
7771 }
7772
7773 static void swrap_thread_child(void)
7774 {
7775         SWRAP_REINIT_ALL;
7776 }
7777
7778 /****************************
7779  * CONSTRUCTOR
7780  ***************************/
7781 void swrap_constructor(void)
7782 {
7783         if (PIPE_BUF < sizeof(struct swrap_unix_scm_rights)) {
7784                 SWRAP_LOG(SWRAP_LOG_ERROR,
7785                           "PIPE_BUF=%zu < "
7786                           "sizeof(struct swrap_unix_scm_rights)=%zu\n"
7787                           "sizeof(struct swrap_unix_scm_rights_payload)=%zu "
7788                           "sizeof(struct socket_info)=%zu",
7789                           (size_t)PIPE_BUF,
7790                           sizeof(struct swrap_unix_scm_rights),
7791                           sizeof(struct swrap_unix_scm_rights_payload),
7792                           sizeof(struct socket_info));
7793                 exit(-1);
7794         }
7795
7796         SWRAP_REINIT_ALL;
7797
7798         /*
7799         * If we hold a lock and the application forks, then the child
7800         * is not able to unlock the mutex and we are in a deadlock.
7801         * This should prevent such deadlocks.
7802         */
7803         pthread_atfork(&swrap_thread_prepare,
7804                        &swrap_thread_parent,
7805                        &swrap_thread_child);
7806 }
7807
7808 /****************************
7809  * DESTRUCTOR
7810  ***************************/
7811
7812 /*
7813  * This function is called when the library is unloaded and makes sure that
7814  * sockets get closed and the unix file for the socket are unlinked.
7815  */
7816 void swrap_destructor(void)
7817 {
7818         size_t i;
7819
7820         if (socket_fds_idx != NULL) {
7821                 for (i = 0; i < socket_fds_max; ++i) {
7822                         if (socket_fds_idx[i] != -1) {
7823                                 swrap_close(i);
7824                         }
7825                 }
7826                 SAFE_FREE(socket_fds_idx);
7827         }
7828
7829         SAFE_FREE(sockets);
7830
7831         if (swrap.libc.handle != NULL) {
7832                 dlclose(swrap.libc.handle);
7833         }
7834         if (swrap.libc.socket_handle) {
7835                 dlclose(swrap.libc.socket_handle);
7836         }
7837 }
7838
7839 #if defined(HAVE__SOCKET) && defined(HAVE__CLOSE)
7840 /*
7841  * On FreeBSD 12 (and maybe other platforms)
7842  * system libraries like libresolv prefix there
7843  * syscalls with '_' in order to always use
7844  * the symbols from libc.
7845  *
7846  * In the interaction with resolv_wrapper,
7847  * we need to inject socket wrapper into libresolv,
7848  * which means we need to private all socket
7849  * related syscalls also with the '_' prefix.
7850  *
7851  * This is tested in Samba's 'make test',
7852  * there we noticed that providing '_read'
7853  * and '_open' would cause errors, which
7854  * means we skip '_read', '_write' and
7855  * all non socket related calls without
7856  * further analyzing the problem.
7857  */
7858 #define SWRAP_SYMBOL_ALIAS(__sym, __aliassym) \
7859         extern typeof(__sym) __aliassym __attribute__ ((alias(#__sym)))
7860
7861 #ifdef HAVE_ACCEPT4
7862 SWRAP_SYMBOL_ALIAS(accept4, _accept4);
7863 #endif
7864 SWRAP_SYMBOL_ALIAS(accept, _accept);
7865 SWRAP_SYMBOL_ALIAS(bind, _bind);
7866 SWRAP_SYMBOL_ALIAS(close, _close);
7867 SWRAP_SYMBOL_ALIAS(connect, _connect);
7868 SWRAP_SYMBOL_ALIAS(dup, _dup);
7869 SWRAP_SYMBOL_ALIAS(dup2, _dup2);
7870 SWRAP_SYMBOL_ALIAS(fcntl, _fcntl);
7871 SWRAP_SYMBOL_ALIAS(getpeername, _getpeername);
7872 SWRAP_SYMBOL_ALIAS(getsockname, _getsockname);
7873 SWRAP_SYMBOL_ALIAS(getsockopt, _getsockopt);
7874 SWRAP_SYMBOL_ALIAS(ioctl, _ioctl);
7875 SWRAP_SYMBOL_ALIAS(listen, _listen);
7876 SWRAP_SYMBOL_ALIAS(readv, _readv);
7877 SWRAP_SYMBOL_ALIAS(recv, _recv);
7878 SWRAP_SYMBOL_ALIAS(recvfrom, _recvfrom);
7879 SWRAP_SYMBOL_ALIAS(recvmsg, _recvmsg);
7880 SWRAP_SYMBOL_ALIAS(send, _send);
7881 SWRAP_SYMBOL_ALIAS(sendmsg, _sendmsg);
7882 SWRAP_SYMBOL_ALIAS(sendto, _sendto);
7883 SWRAP_SYMBOL_ALIAS(setsockopt, _setsockopt);
7884 SWRAP_SYMBOL_ALIAS(socket, _socket);
7885 SWRAP_SYMBOL_ALIAS(socketpair, _socketpair);
7886 SWRAP_SYMBOL_ALIAS(writev, _writev);
7887
7888 #endif /* SOCKET_WRAPPER_EXPORT_UNDERSCORE_SYMBOLS */