14bd97a2d343c033bb8460fde6f68e35e4908c39
[socket_wrapper.git] / src / socket_wrapper.c
1 /*
2  * BSD 3-Clause License
3  *
4  * Copyright (c) 2005-2008, Jelmer Vernooij <jelmer@samba.org>
5  * Copyright (c) 2006-2021, Stefan Metzmacher <metze@samba.org>
6  * Copyright (c) 2013-2021, Andreas Schneider <asn@samba.org>
7  * Copyright (c) 2014-2017, Michael Adam <obnox@samba.org>
8  * Copyright (c) 2016-2018, Anoop C S <anoopcs@redhat.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  *
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * 3. Neither the name of the author nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38
39 /*
40    Socket wrapper library. Passes all socket communication over
41    unix domain sockets if the environment variable SOCKET_WRAPPER_DIR
42    is set.
43 */
44
45 #include "config.h"
46
47 #include <sys/types.h>
48 #include <sys/time.h>
49 #include <sys/stat.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #ifdef HAVE_SYS_FILIO_H
53 #include <sys/filio.h>
54 #endif
55 #ifdef HAVE_SYS_SIGNALFD_H
56 #include <sys/signalfd.h>
57 #endif
58 #ifdef HAVE_SYS_EVENTFD_H
59 #include <sys/eventfd.h>
60 #endif
61 #ifdef HAVE_SYS_TIMERFD_H
62 #include <sys/timerfd.h>
63 #endif
64 #include <sys/uio.h>
65 #include <errno.h>
66 #include <sys/un.h>
67 #include <netinet/in.h>
68 #include <netinet/tcp.h>
69 #ifdef HAVE_NETINET_TCP_FSM_H
70 #include <netinet/tcp_fsm.h>
71 #endif
72 #include <arpa/inet.h>
73 #include <fcntl.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <stdio.h>
77 #include <stdint.h>
78 #include <stdarg.h>
79 #include <stdbool.h>
80 #include <unistd.h>
81 #ifdef HAVE_GNU_LIB_NAMES_H
82 #include <gnu/lib-names.h>
83 #endif
84 #ifdef HAVE_RPC_RPC_H
85 #include <rpc/rpc.h>
86 #endif
87 #include <pthread.h>
88
89 #include "socket_wrapper.h"
90
91 enum swrap_dbglvl_e {
92         SWRAP_LOG_ERROR = 0,
93         SWRAP_LOG_WARN,
94         SWRAP_LOG_DEBUG,
95         SWRAP_LOG_TRACE
96 };
97
98 /* GCC have printf type attribute check. */
99 #ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT
100 #define PRINTF_ATTRIBUTE(a,b) __attribute__ ((__format__ (__printf__, a, b)))
101 #else
102 #define PRINTF_ATTRIBUTE(a,b)
103 #endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */
104
105 #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
106 #define CONSTRUCTOR_ATTRIBUTE __attribute__ ((constructor))
107 #else
108 #define CONSTRUCTOR_ATTRIBUTE
109 #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
110
111 #ifdef HAVE_DESTRUCTOR_ATTRIBUTE
112 #define DESTRUCTOR_ATTRIBUTE __attribute__ ((destructor))
113 #else
114 #define DESTRUCTOR_ATTRIBUTE
115 #endif
116
117 #ifndef FALL_THROUGH
118 # ifdef HAVE_FALLTHROUGH_ATTRIBUTE
119 #  define FALL_THROUGH __attribute__ ((fallthrough))
120 # else /* HAVE_FALLTHROUGH_ATTRIBUTE */
121 #  define FALL_THROUGH ((void)0)
122 # endif /* HAVE_FALLTHROUGH_ATTRIBUTE */
123 #endif /* FALL_THROUGH */
124
125 #ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE
126 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE __attribute__((no_sanitize_address))
127 #else
128 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
129 #endif
130
131 #ifdef HAVE_GCC_THREAD_LOCAL_STORAGE
132 # define SWRAP_THREAD __thread
133 #else
134 # define SWRAP_THREAD
135 #endif
136
137 #ifndef MIN
138 #define MIN(a,b) ((a)<(b)?(a):(b))
139 #endif
140
141 #ifndef ZERO_STRUCT
142 #define ZERO_STRUCT(x) memset((char *)&(x), 0, sizeof(x))
143 #endif
144
145 #ifndef ZERO_STRUCTP
146 #define ZERO_STRUCTP(x) do { \
147                 if ((x) != NULL) \
148                         memset((char *)(x), 0, sizeof(*(x))); \
149         } while(0)
150 #endif
151
152 #ifndef SAFE_FREE
153 #define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0)
154 #endif
155
156 #ifndef discard_const
157 #define discard_const(ptr) ((void *)((uintptr_t)(ptr)))
158 #endif
159
160 #ifndef discard_const_p
161 #define discard_const_p(type, ptr) ((type *)discard_const(ptr))
162 #endif
163
164 #define UNUSED(x) (void)(x)
165
166 #ifdef IPV6_PKTINFO
167 # ifndef IPV6_RECVPKTINFO
168 #  define IPV6_RECVPKTINFO IPV6_PKTINFO
169 # endif /* IPV6_RECVPKTINFO */
170 #endif /* IPV6_PKTINFO */
171
172 /*
173  * On BSD IP_PKTINFO has a different name because during
174  * the time when they implemented it, there was no RFC.
175  * The name for IPv6 is the same as on Linux.
176  */
177 #ifndef IP_PKTINFO
178 # ifdef IP_RECVDSTADDR
179 #  define IP_PKTINFO IP_RECVDSTADDR
180 # endif
181 #endif
182
183 #define socket_wrapper_init_mutex(m) \
184         _socket_wrapper_init_mutex(m, #m)
185
186 /* Add new global locks here please */
187 # define SWRAP_REINIT_ALL do { \
188         int ret; \
189         ret = socket_wrapper_init_mutex(&sockets_mutex); \
190         if (ret != 0) exit(-1); \
191         ret = socket_wrapper_init_mutex(&socket_reset_mutex); \
192         if (ret != 0) exit(-1); \
193         ret = socket_wrapper_init_mutex(&first_free_mutex); \
194         if (ret != 0) exit(-1); \
195         ret = socket_wrapper_init_mutex(&sockets_si_global); \
196         if (ret != 0) exit(-1); \
197         ret = socket_wrapper_init_mutex(&autobind_start_mutex); \
198         if (ret != 0) exit(-1); \
199         ret = socket_wrapper_init_mutex(&pcap_dump_mutex); \
200         if (ret != 0) exit(-1); \
201         ret = socket_wrapper_init_mutex(&mtu_update_mutex); \
202         if (ret != 0) exit(-1); \
203 } while(0)
204
205 # define SWRAP_LOCK_ALL do { \
206         swrap_mutex_lock(&sockets_mutex); \
207         swrap_mutex_lock(&socket_reset_mutex); \
208         swrap_mutex_lock(&first_free_mutex); \
209         swrap_mutex_lock(&sockets_si_global); \
210         swrap_mutex_lock(&autobind_start_mutex); \
211         swrap_mutex_lock(&pcap_dump_mutex); \
212         swrap_mutex_lock(&mtu_update_mutex); \
213 } while(0)
214
215 # define SWRAP_UNLOCK_ALL do { \
216         swrap_mutex_unlock(&mtu_update_mutex); \
217         swrap_mutex_unlock(&pcap_dump_mutex); \
218         swrap_mutex_unlock(&autobind_start_mutex); \
219         swrap_mutex_unlock(&sockets_si_global); \
220         swrap_mutex_unlock(&first_free_mutex); \
221         swrap_mutex_unlock(&socket_reset_mutex); \
222         swrap_mutex_unlock(&sockets_mutex); \
223 } while(0)
224
225 #define SOCKET_INFO_CONTAINER(si) \
226         (struct socket_info_container *)(si)
227
228 #define SWRAP_LOCK_SI(si) do { \
229         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
230         if (sic != NULL) { \
231                 swrap_mutex_lock(&sockets_si_global); \
232         } else { \
233                 abort(); \
234         } \
235 } while(0)
236
237 #define SWRAP_UNLOCK_SI(si) do { \
238         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
239         if (sic != NULL) { \
240                 swrap_mutex_unlock(&sockets_si_global); \
241         } else { \
242                 abort(); \
243         } \
244 } while(0)
245
246 #if defined(HAVE_GETTIMEOFDAY_TZ) || defined(HAVE_GETTIMEOFDAY_TZ_VOID)
247 #define swrapGetTimeOfDay(tval) gettimeofday(tval,NULL)
248 #else
249 #define swrapGetTimeOfDay(tval) gettimeofday(tval)
250 #endif
251
252 /* we need to use a very terse format here as IRIX 6.4 silently
253    truncates names to 16 chars, so if we use a longer name then we
254    can't tell which port a packet came from with recvfrom()
255
256    with this format we have 8 chars left for the directory name
257 */
258 #define SOCKET_FORMAT "%c%02X%04X"
259 #define SOCKET_TYPE_CHAR_TCP            'T'
260 #define SOCKET_TYPE_CHAR_UDP            'U'
261 #define SOCKET_TYPE_CHAR_TCP_V6         'X'
262 #define SOCKET_TYPE_CHAR_UDP_V6         'Y'
263
264 /*
265  * Set the packet MTU to 1500 bytes for stream sockets to make it it easier to
266  * format PCAP capture files (as the caller will simply continue from here).
267  */
268 #define SOCKET_WRAPPER_MTU_DEFAULT 1500
269 #define SOCKET_WRAPPER_MTU_MIN     512
270 #define SOCKET_WRAPPER_MTU_MAX     32768
271
272 #define SOCKET_MAX_SOCKETS 1024
273
274 /*
275  * Maximum number of socket_info structures that can
276  * be used. Can be overriden by the environment variable
277  * SOCKET_WRAPPER_MAX_SOCKETS.
278  */
279 #define SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT 65535
280
281 #define SOCKET_WRAPPER_MAX_SOCKETS_LIMIT 262140
282
283 /* This limit is to avoid broadcast sendto() needing to stat too many
284  * files.  It may be raised (with a performance cost) to up to 254
285  * without changing the format above */
286 #define MAX_WRAPPED_INTERFACES 64
287
288 struct swrap_address {
289         socklen_t sa_socklen;
290         union {
291                 struct sockaddr s;
292                 struct sockaddr_in in;
293 #ifdef HAVE_IPV6
294                 struct sockaddr_in6 in6;
295 #endif
296                 struct sockaddr_un un;
297                 struct sockaddr_storage ss;
298         } sa;
299 };
300
301 static int first_free;
302
303 struct socket_info
304 {
305         /*
306          * Remember to update swrap_unix_scm_right_magic
307          * on any change.
308          */
309
310         int family;
311         int type;
312         int protocol;
313         int bound;
314         int bcast;
315         int is_server;
316         int connected;
317         int defer_connect;
318         int pktinfo;
319         int tcp_nodelay;
320         int listening;
321         int fd_passed;
322
323         /* The unix path so we can unlink it on close() */
324         struct sockaddr_un un_addr;
325
326         struct swrap_address bindname;
327         struct swrap_address myname;
328         struct swrap_address peername;
329
330         struct {
331                 unsigned long pck_snd;
332                 unsigned long pck_rcv;
333         } io;
334 };
335
336 struct socket_info_meta
337 {
338         unsigned int refcount;
339         int next_free;
340         /*
341          * As long as we don't use shared memory
342          * for the sockets array, we use
343          * sockets_si_global as a single mutex.
344          *
345          * pthread_mutex_t mutex;
346          */
347 };
348
349 struct socket_info_container
350 {
351         struct socket_info info;
352         struct socket_info_meta meta;
353 };
354
355 static struct socket_info_container *sockets;
356
357 static size_t socket_info_max = 0;
358
359 /*
360  * Allocate the socket array always on the limit value. We want it to be
361  * at least bigger than the default so if we reach the limit we can
362  * still deal with duplicate fds pointing to the same socket_info.
363  */
364 static size_t socket_fds_max = SOCKET_WRAPPER_MAX_SOCKETS_LIMIT;
365
366 /* Hash table to map fds to corresponding socket_info index */
367 static int *socket_fds_idx;
368
369 /* Mutex for syncronizing port selection during swrap_auto_bind() */
370 static pthread_mutex_t autobind_start_mutex = PTHREAD_MUTEX_INITIALIZER;
371
372 /* Mutex to guard the initialization of array of socket_info structures */
373 static pthread_mutex_t sockets_mutex = PTHREAD_MUTEX_INITIALIZER;
374
375 /* Mutex to guard the socket reset in swrap_remove_wrapper() */
376 static pthread_mutex_t socket_reset_mutex = PTHREAD_MUTEX_INITIALIZER;
377
378 /* Mutex to synchronize access to first free index in socket_info array */
379 static pthread_mutex_t first_free_mutex = PTHREAD_MUTEX_INITIALIZER;
380
381 /*
382  * Mutex to synchronize access to to socket_info structures
383  * We use a single global mutex in order to avoid leaking
384  * ~ 38M copy on write memory per fork.
385  * max_sockets=65535 * sizeof(struct socket_info_container)=592 = 38796720
386  */
387 static pthread_mutex_t sockets_si_global = PTHREAD_MUTEX_INITIALIZER;
388
389 /* Mutex to synchronize access to packet capture dump file */
390 static pthread_mutex_t pcap_dump_mutex = PTHREAD_MUTEX_INITIALIZER;
391
392 /* Mutex for synchronizing mtu value fetch*/
393 static pthread_mutex_t mtu_update_mutex = PTHREAD_MUTEX_INITIALIZER;
394
395 /* Function prototypes */
396
397 #if ! defined(HAVE_CONSTRUCTOR_ATTRIBUTE) && defined(HAVE_PRAGMA_INIT)
398 /* xlC and other oldschool compilers support (only) this */
399 #pragma init (swrap_constructor)
400 #endif
401 void swrap_constructor(void) CONSTRUCTOR_ATTRIBUTE;
402 #if ! defined(HAVE_DESTRUCTOR_ATTRIBUTE) && defined(HAVE_PRAGMA_FINI)
403 #pragma fini (swrap_destructor)
404 #endif
405 void swrap_destructor(void) DESTRUCTOR_ATTRIBUTE;
406
407 #ifndef HAVE_GETPROGNAME
408 static const char *getprogname(void)
409 {
410 #if defined(HAVE_PROGRAM_INVOCATION_SHORT_NAME)
411         return program_invocation_short_name;
412 #elif defined(HAVE_GETEXECNAME)
413         return getexecname();
414 #else
415         return NULL;
416 #endif /* HAVE_PROGRAM_INVOCATION_SHORT_NAME */
417 }
418 #endif /* HAVE_GETPROGNAME */
419
420 static void swrap_log(enum swrap_dbglvl_e dbglvl, const char *func, const char *format, ...) PRINTF_ATTRIBUTE(3, 4);
421 # define SWRAP_LOG(dbglvl, ...) swrap_log((dbglvl), __func__, __VA_ARGS__)
422
423 static void swrap_log(enum swrap_dbglvl_e dbglvl,
424                       const char *func,
425                       const char *format, ...)
426 {
427         char buffer[1024];
428         va_list va;
429         const char *d;
430         unsigned int lvl = 0;
431         const char *prefix = "SWRAP";
432         const char *progname = getprogname();
433
434         d = getenv("SOCKET_WRAPPER_DEBUGLEVEL");
435         if (d != NULL) {
436                 lvl = atoi(d);
437         }
438
439         if (lvl < dbglvl) {
440                 return;
441         }
442
443         va_start(va, format);
444         vsnprintf(buffer, sizeof(buffer), format, va);
445         va_end(va);
446
447         switch (dbglvl) {
448                 case SWRAP_LOG_ERROR:
449                         prefix = "SWRAP_ERROR";
450                         break;
451                 case SWRAP_LOG_WARN:
452                         prefix = "SWRAP_WARN";
453                         break;
454                 case SWRAP_LOG_DEBUG:
455                         prefix = "SWRAP_DEBUG";
456                         break;
457                 case SWRAP_LOG_TRACE:
458                         prefix = "SWRAP_TRACE";
459                         break;
460         }
461
462         if (progname == NULL) {
463                 progname = "<unknown>";
464         }
465
466         fprintf(stderr,
467                 "%s[%s (%u)] - %s: %s\n",
468                 prefix,
469                 progname,
470                 (unsigned int)getpid(),
471                 func,
472                 buffer);
473 }
474
475 /*********************************************************
476  * SWRAP LOADING LIBC FUNCTIONS
477  *********************************************************/
478
479 #include <dlfcn.h>
480
481 #ifdef HAVE_ACCEPT4
482 typedef int (*__libc_accept4)(int sockfd,
483                               struct sockaddr *addr,
484                               socklen_t *addrlen,
485                               int flags);
486 #else
487 typedef int (*__libc_accept)(int sockfd,
488                              struct sockaddr *addr,
489                              socklen_t *addrlen);
490 #endif
491 typedef int (*__libc_bind)(int sockfd,
492                            const struct sockaddr *addr,
493                            socklen_t addrlen);
494 typedef int (*__libc_close)(int fd);
495 #ifdef HAVE___CLOSE_NOCANCEL
496 typedef int (*__libc___close_nocancel)(int fd);
497 #endif
498 typedef int (*__libc_connect)(int sockfd,
499                               const struct sockaddr *addr,
500                               socklen_t addrlen);
501 typedef int (*__libc_dup)(int fd);
502 typedef int (*__libc_dup2)(int oldfd, int newfd);
503 typedef int (*__libc_fcntl)(int fd, int cmd, ...);
504 typedef FILE *(*__libc_fopen)(const char *name, const char *mode);
505 #ifdef HAVE_FOPEN64
506 typedef FILE *(*__libc_fopen64)(const char *name, const char *mode);
507 #endif
508 #ifdef HAVE_EVENTFD
509 typedef int (*__libc_eventfd)(int count, int flags);
510 #endif
511 typedef int (*__libc_getpeername)(int sockfd,
512                                   struct sockaddr *addr,
513                                   socklen_t *addrlen);
514 typedef int (*__libc_getsockname)(int sockfd,
515                                   struct sockaddr *addr,
516                                   socklen_t *addrlen);
517 typedef int (*__libc_getsockopt)(int sockfd,
518                                int level,
519                                int optname,
520                                void *optval,
521                                socklen_t *optlen);
522 typedef int (*__libc_ioctl)(int d, unsigned long int request, ...);
523 typedef int (*__libc_listen)(int sockfd, int backlog);
524 typedef int (*__libc_open)(const char *pathname, int flags, ...);
525 #ifdef HAVE_OPEN64
526 typedef int (*__libc_open64)(const char *pathname, int flags, ...);
527 #endif /* HAVE_OPEN64 */
528 typedef int (*__libc_openat)(int dirfd, const char *path, int flags, ...);
529 typedef int (*__libc_pipe)(int pipefd[2]);
530 typedef int (*__libc_read)(int fd, void *buf, size_t count);
531 typedef ssize_t (*__libc_readv)(int fd, const struct iovec *iov, int iovcnt);
532 typedef int (*__libc_recv)(int sockfd, void *buf, size_t len, int flags);
533 typedef int (*__libc_recvfrom)(int sockfd,
534                              void *buf,
535                              size_t len,
536                              int flags,
537                              struct sockaddr *src_addr,
538                              socklen_t *addrlen);
539 typedef int (*__libc_recvmsg)(int sockfd, const struct msghdr *msg, int flags);
540 typedef int (*__libc_send)(int sockfd, const void *buf, size_t len, int flags);
541 typedef int (*__libc_sendmsg)(int sockfd, const struct msghdr *msg, int flags);
542 typedef int (*__libc_sendto)(int sockfd,
543                            const void *buf,
544                            size_t len,
545                            int flags,
546                            const  struct sockaddr *dst_addr,
547                            socklen_t addrlen);
548 typedef int (*__libc_setsockopt)(int sockfd,
549                                int level,
550                                int optname,
551                                const void *optval,
552                                socklen_t optlen);
553 #ifdef HAVE_SIGNALFD
554 typedef int (*__libc_signalfd)(int fd, const sigset_t *mask, int flags);
555 #endif
556 typedef int (*__libc_socket)(int domain, int type, int protocol);
557 typedef int (*__libc_socketpair)(int domain, int type, int protocol, int sv[2]);
558 #ifdef HAVE_TIMERFD_CREATE
559 typedef int (*__libc_timerfd_create)(int clockid, int flags);
560 #endif
561 typedef ssize_t (*__libc_write)(int fd, const void *buf, size_t count);
562 typedef ssize_t (*__libc_writev)(int fd, const struct iovec *iov, int iovcnt);
563
564 #define SWRAP_SYMBOL_ENTRY(i) \
565         union { \
566                 __libc_##i f; \
567                 void *obj; \
568         } _libc_##i
569
570 struct swrap_libc_symbols {
571 #ifdef HAVE_ACCEPT4
572         SWRAP_SYMBOL_ENTRY(accept4);
573 #else
574         SWRAP_SYMBOL_ENTRY(accept);
575 #endif
576         SWRAP_SYMBOL_ENTRY(bind);
577         SWRAP_SYMBOL_ENTRY(close);
578 #ifdef HAVE___CLOSE_NOCANCEL
579         SWRAP_SYMBOL_ENTRY(__close_nocancel);
580 #endif
581         SWRAP_SYMBOL_ENTRY(connect);
582         SWRAP_SYMBOL_ENTRY(dup);
583         SWRAP_SYMBOL_ENTRY(dup2);
584         SWRAP_SYMBOL_ENTRY(fcntl);
585         SWRAP_SYMBOL_ENTRY(fopen);
586 #ifdef HAVE_FOPEN64
587         SWRAP_SYMBOL_ENTRY(fopen64);
588 #endif
589 #ifdef HAVE_EVENTFD
590         SWRAP_SYMBOL_ENTRY(eventfd);
591 #endif
592         SWRAP_SYMBOL_ENTRY(getpeername);
593         SWRAP_SYMBOL_ENTRY(getsockname);
594         SWRAP_SYMBOL_ENTRY(getsockopt);
595         SWRAP_SYMBOL_ENTRY(ioctl);
596         SWRAP_SYMBOL_ENTRY(listen);
597         SWRAP_SYMBOL_ENTRY(open);
598 #ifdef HAVE_OPEN64
599         SWRAP_SYMBOL_ENTRY(open64);
600 #endif
601         SWRAP_SYMBOL_ENTRY(openat);
602         SWRAP_SYMBOL_ENTRY(pipe);
603         SWRAP_SYMBOL_ENTRY(read);
604         SWRAP_SYMBOL_ENTRY(readv);
605         SWRAP_SYMBOL_ENTRY(recv);
606         SWRAP_SYMBOL_ENTRY(recvfrom);
607         SWRAP_SYMBOL_ENTRY(recvmsg);
608         SWRAP_SYMBOL_ENTRY(send);
609         SWRAP_SYMBOL_ENTRY(sendmsg);
610         SWRAP_SYMBOL_ENTRY(sendto);
611         SWRAP_SYMBOL_ENTRY(setsockopt);
612 #ifdef HAVE_SIGNALFD
613         SWRAP_SYMBOL_ENTRY(signalfd);
614 #endif
615         SWRAP_SYMBOL_ENTRY(socket);
616         SWRAP_SYMBOL_ENTRY(socketpair);
617 #ifdef HAVE_TIMERFD_CREATE
618         SWRAP_SYMBOL_ENTRY(timerfd_create);
619 #endif
620         SWRAP_SYMBOL_ENTRY(write);
621         SWRAP_SYMBOL_ENTRY(writev);
622 };
623
624 struct swrap {
625         struct {
626                 void *handle;
627                 void *socket_handle;
628                 struct swrap_libc_symbols symbols;
629         } libc;
630 };
631
632 static struct swrap swrap;
633
634 /* prototypes */
635 static char *socket_wrapper_dir(void);
636
637 #define LIBC_NAME "libc.so"
638
639 enum swrap_lib {
640     SWRAP_LIBC,
641     SWRAP_LIBSOCKET,
642 };
643
644 static const char *swrap_str_lib(enum swrap_lib lib)
645 {
646         switch (lib) {
647         case SWRAP_LIBC:
648                 return "libc";
649         case SWRAP_LIBSOCKET:
650                 return "libsocket";
651         }
652
653         /* Compiler would warn us about unhandled enum value if we get here */
654         return "unknown";
655 }
656
657 static void *swrap_load_lib_handle(enum swrap_lib lib)
658 {
659         int flags = RTLD_LAZY;
660         void *handle = NULL;
661         int i;
662
663 #ifdef RTLD_DEEPBIND
664         const char *env_preload = getenv("LD_PRELOAD");
665         const char *env_deepbind = getenv("SOCKET_WRAPPER_DISABLE_DEEPBIND");
666         bool enable_deepbind = true;
667
668         /* Don't do a deepbind if we run with libasan */
669         if (env_preload != NULL && strlen(env_preload) < 1024) {
670                 const char *p = strstr(env_preload, "libasan.so");
671                 if (p != NULL) {
672                         enable_deepbind = false;
673                 }
674         }
675
676         if (env_deepbind != NULL && strlen(env_deepbind) >= 1) {
677                 enable_deepbind = false;
678         }
679
680         if (enable_deepbind) {
681                 flags |= RTLD_DEEPBIND;
682         }
683 #endif
684
685         switch (lib) {
686         case SWRAP_LIBSOCKET:
687 #ifdef HAVE_LIBSOCKET
688                 handle = swrap.libc.socket_handle;
689                 if (handle == NULL) {
690                         for (i = 10; i >= 0; i--) {
691                                 char soname[256] = {0};
692
693                                 snprintf(soname, sizeof(soname), "libsocket.so.%d", i);
694                                 handle = dlopen(soname, flags);
695                                 if (handle != NULL) {
696                                         break;
697                                 }
698                         }
699
700                         swrap.libc.socket_handle = handle;
701                 }
702                 break;
703 #endif
704         case SWRAP_LIBC:
705                 handle = swrap.libc.handle;
706 #ifdef LIBC_SO
707                 if (handle == NULL) {
708                         handle = dlopen(LIBC_SO, flags);
709
710                         swrap.libc.handle = handle;
711                 }
712 #endif
713                 if (handle == NULL) {
714                         for (i = 10; i >= 0; i--) {
715                                 char soname[256] = {0};
716
717                                 snprintf(soname, sizeof(soname), "libc.so.%d", i);
718                                 handle = dlopen(soname, flags);
719                                 if (handle != NULL) {
720                                         break;
721                                 }
722                         }
723
724                         swrap.libc.handle = handle;
725                 }
726                 break;
727         }
728
729         if (handle == NULL) {
730 #ifdef RTLD_NEXT
731                 handle = swrap.libc.handle = swrap.libc.socket_handle = RTLD_NEXT;
732 #else
733                 SWRAP_LOG(SWRAP_LOG_ERROR,
734                           "Failed to dlopen library: %s",
735                           dlerror());
736                 exit(-1);
737 #endif
738         }
739
740         return handle;
741 }
742
743 static void *_swrap_bind_symbol(enum swrap_lib lib, const char *fn_name)
744 {
745         void *handle;
746         void *func;
747
748         handle = swrap_load_lib_handle(lib);
749
750         func = dlsym(handle, fn_name);
751         if (func == NULL) {
752                 SWRAP_LOG(SWRAP_LOG_ERROR,
753                           "Failed to find %s: %s",
754                           fn_name,
755                           dlerror());
756                 exit(-1);
757         }
758
759         SWRAP_LOG(SWRAP_LOG_TRACE,
760                   "Loaded %s from %s",
761                   fn_name,
762                   swrap_str_lib(lib));
763
764         return func;
765 }
766
767 #define swrap_mutex_lock(m) _swrap_mutex_lock(m, #m, __func__, __LINE__)
768 static void _swrap_mutex_lock(pthread_mutex_t *mutex, const char *name, const char *caller, unsigned line)
769 {
770         int ret;
771
772         ret = pthread_mutex_lock(mutex);
773         if (ret != 0) {
774                 SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't lock pthread mutex(%s) - %s",
775                           getpid(), getppid(), caller, line, name, strerror(ret));
776                 abort();
777         }
778 }
779
780 #define swrap_mutex_unlock(m) _swrap_mutex_unlock(m, #m, __func__, __LINE__)
781 static void _swrap_mutex_unlock(pthread_mutex_t *mutex, const char *name, const char *caller, unsigned line)
782 {
783         int ret;
784
785         ret = pthread_mutex_unlock(mutex);
786         if (ret != 0) {
787                 SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't unlock pthread mutex(%s) - %s",
788                           getpid(), getppid(), caller, line, name, strerror(ret));
789                 abort();
790         }
791 }
792
793 /*
794  * These macros have a thread race condition on purpose!
795  *
796  * This is an optimization to avoid locking each time we check if the symbol is
797  * bound.
798  */
799 #define _swrap_bind_symbol_generic(lib, sym_name) do { \
800         swrap.libc.symbols._libc_##sym_name.obj = \
801                 _swrap_bind_symbol(lib, #sym_name); \
802 } while(0);
803
804 #define swrap_bind_symbol_libc(sym_name) \
805         _swrap_bind_symbol_generic(SWRAP_LIBC, sym_name)
806
807 #define swrap_bind_symbol_libsocket(sym_name) \
808         _swrap_bind_symbol_generic(SWRAP_LIBSOCKET, sym_name)
809
810 static void swrap_bind_symbol_all(void);
811
812 /****************************************************************************
813  *                               IMPORTANT
814  ****************************************************************************
815  *
816  * Functions especially from libc need to be loaded individually, you can't
817  * load all at once or gdb will segfault at startup. The same applies to
818  * valgrind and has probably something todo with with the linker.  So we need
819  * load each function at the point it is called the first time.
820  *
821  ****************************************************************************/
822
823 #ifdef HAVE_ACCEPT4
824 static int libc_accept4(int sockfd,
825                         struct sockaddr *addr,
826                         socklen_t *addrlen,
827                         int flags)
828 {
829         swrap_bind_symbol_all();
830
831         return swrap.libc.symbols._libc_accept4.f(sockfd, addr, addrlen, flags);
832 }
833
834 #else /* HAVE_ACCEPT4 */
835
836 static int libc_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
837 {
838         swrap_bind_symbol_all();
839
840         return swrap.libc.symbols._libc_accept.f(sockfd, addr, addrlen);
841 }
842 #endif /* HAVE_ACCEPT4 */
843
844 static int libc_bind(int sockfd,
845                      const struct sockaddr *addr,
846                      socklen_t addrlen)
847 {
848         swrap_bind_symbol_all();
849
850         return swrap.libc.symbols._libc_bind.f(sockfd, addr, addrlen);
851 }
852
853 static int libc_close(int fd)
854 {
855         swrap_bind_symbol_all();
856
857         return swrap.libc.symbols._libc_close.f(fd);
858 }
859
860 #ifdef HAVE___CLOSE_NOCANCEL
861 static int libc___close_nocancel(int fd)
862 {
863         swrap_bind_symbol_all();
864
865         return swrap.libc.symbols._libc___close_nocancel.f(fd);
866 }
867 #endif /* HAVE___CLOSE_NOCANCEL */
868
869 static int libc_connect(int sockfd,
870                         const struct sockaddr *addr,
871                         socklen_t addrlen)
872 {
873         swrap_bind_symbol_all();
874
875         return swrap.libc.symbols._libc_connect.f(sockfd, addr, addrlen);
876 }
877
878 static int libc_dup(int fd)
879 {
880         swrap_bind_symbol_all();
881
882         return swrap.libc.symbols._libc_dup.f(fd);
883 }
884
885 static int libc_dup2(int oldfd, int newfd)
886 {
887         swrap_bind_symbol_all();
888
889         return swrap.libc.symbols._libc_dup2.f(oldfd, newfd);
890 }
891
892 #ifdef HAVE_EVENTFD
893 static int libc_eventfd(int count, int flags)
894 {
895         swrap_bind_symbol_all();
896
897         return swrap.libc.symbols._libc_eventfd.f(count, flags);
898 }
899 #endif
900
901 DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
902 static int libc_vfcntl(int fd, int cmd, va_list ap)
903 {
904         void *arg;
905         int rc;
906
907         swrap_bind_symbol_all();
908
909         arg = va_arg(ap, void *);
910
911         rc = swrap.libc.symbols._libc_fcntl.f(fd, cmd, arg);
912
913         return rc;
914 }
915
916 static int libc_getpeername(int sockfd,
917                             struct sockaddr *addr,
918                             socklen_t *addrlen)
919 {
920         swrap_bind_symbol_all();
921
922         return swrap.libc.symbols._libc_getpeername.f(sockfd, addr, addrlen);
923 }
924
925 static int libc_getsockname(int sockfd,
926                             struct sockaddr *addr,
927                             socklen_t *addrlen)
928 {
929         swrap_bind_symbol_all();
930
931         return swrap.libc.symbols._libc_getsockname.f(sockfd, addr, addrlen);
932 }
933
934 static int libc_getsockopt(int sockfd,
935                            int level,
936                            int optname,
937                            void *optval,
938                            socklen_t *optlen)
939 {
940         swrap_bind_symbol_all();
941
942         return swrap.libc.symbols._libc_getsockopt.f(sockfd,
943                                                      level,
944                                                      optname,
945                                                      optval,
946                                                      optlen);
947 }
948
949 DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
950 static int libc_vioctl(int d, unsigned long int request, va_list ap)
951 {
952         void *arg;
953         int rc;
954
955         swrap_bind_symbol_all();
956
957         arg = va_arg(ap, void *);
958
959         rc = swrap.libc.symbols._libc_ioctl.f(d, request, arg);
960
961         return rc;
962 }
963
964 static int libc_listen(int sockfd, int backlog)
965 {
966         swrap_bind_symbol_all();
967
968         return swrap.libc.symbols._libc_listen.f(sockfd, backlog);
969 }
970
971 static FILE *libc_fopen(const char *name, const char *mode)
972 {
973         swrap_bind_symbol_all();
974
975         return swrap.libc.symbols._libc_fopen.f(name, mode);
976 }
977
978 #ifdef HAVE_FOPEN64
979 static FILE *libc_fopen64(const char *name, const char *mode)
980 {
981         swrap_bind_symbol_all();
982
983         return swrap.libc.symbols._libc_fopen64.f(name, mode);
984 }
985 #endif /* HAVE_FOPEN64 */
986
987 static void swrap_inject_o_largefile(int *flags)
988 {
989         (void)*flags; /* maybe unused */
990 #if SIZE_MAX == 0xffffffffUL && defined(O_LARGEFILE)
991 #ifdef O_PATH
992         if (((*flags) & O_PATH) == 0)
993 #endif
994         {
995                 *flags |= O_LARGEFILE;
996         }
997 #endif
998 }
999
1000 static int libc_vopen(const char *pathname, int flags, va_list ap)
1001 {
1002         int mode = 0;
1003         int fd;
1004
1005         swrap_bind_symbol_all();
1006
1007         swrap_inject_o_largefile(&flags);
1008
1009         if (flags & O_CREAT) {
1010                 mode = va_arg(ap, int);
1011         }
1012         fd = swrap.libc.symbols._libc_open.f(pathname, flags, (mode_t)mode);
1013
1014         return fd;
1015 }
1016
1017 static int libc_open(const char *pathname, int flags, ...)
1018 {
1019         va_list ap;
1020         int fd;
1021
1022         va_start(ap, flags);
1023         fd = libc_vopen(pathname, flags, ap);
1024         va_end(ap);
1025
1026         return fd;
1027 }
1028
1029 #ifdef HAVE_OPEN64
1030 static int libc_vopen64(const char *pathname, int flags, va_list ap)
1031 {
1032         int mode = 0;
1033         int fd;
1034
1035         swrap_bind_symbol_all();
1036
1037         swrap_inject_o_largefile(&flags);
1038
1039         if (flags & O_CREAT) {
1040                 mode = va_arg(ap, int);
1041         }
1042         fd = swrap.libc.symbols._libc_open64.f(pathname, flags, (mode_t)mode);
1043
1044         return fd;
1045 }
1046 #endif /* HAVE_OPEN64 */
1047
1048 static int libc_vopenat(int dirfd, const char *path, int flags, va_list ap)
1049 {
1050         int mode = 0;
1051         int fd;
1052
1053         swrap_bind_symbol_all();
1054
1055         swrap_inject_o_largefile(&flags);
1056
1057         if (flags & O_CREAT) {
1058                 mode = va_arg(ap, int);
1059         }
1060         fd = swrap.libc.symbols._libc_openat.f(dirfd,
1061                                                path,
1062                                                flags,
1063                                                (mode_t)mode);
1064
1065         return fd;
1066 }
1067
1068 #if 0
1069 static int libc_openat(int dirfd, const char *path, int flags, ...)
1070 {
1071         va_list ap;
1072         int fd;
1073
1074         va_start(ap, flags);
1075         fd = libc_vopenat(dirfd, path, flags, ap);
1076         va_end(ap);
1077
1078         return fd;
1079 }
1080 #endif
1081
1082 static int libc_pipe(int pipefd[2])
1083 {
1084         swrap_bind_symbol_all();
1085
1086         return swrap.libc.symbols._libc_pipe.f(pipefd);
1087 }
1088
1089 static int libc_read(int fd, void *buf, size_t count)
1090 {
1091         swrap_bind_symbol_all();
1092
1093         return swrap.libc.symbols._libc_read.f(fd, buf, count);
1094 }
1095
1096 static ssize_t libc_readv(int fd, const struct iovec *iov, int iovcnt)
1097 {
1098         swrap_bind_symbol_all();
1099
1100         return swrap.libc.symbols._libc_readv.f(fd, iov, iovcnt);
1101 }
1102
1103 static int libc_recv(int sockfd, void *buf, size_t len, int flags)
1104 {
1105         swrap_bind_symbol_all();
1106
1107         return swrap.libc.symbols._libc_recv.f(sockfd, buf, len, flags);
1108 }
1109
1110 static int libc_recvfrom(int sockfd,
1111                          void *buf,
1112                          size_t len,
1113                          int flags,
1114                          struct sockaddr *src_addr,
1115                          socklen_t *addrlen)
1116 {
1117         swrap_bind_symbol_all();
1118
1119         return swrap.libc.symbols._libc_recvfrom.f(sockfd,
1120                                                    buf,
1121                                                    len,
1122                                                    flags,
1123                                                    src_addr,
1124                                                    addrlen);
1125 }
1126
1127 static int libc_recvmsg(int sockfd, struct msghdr *msg, int flags)
1128 {
1129         swrap_bind_symbol_all();
1130
1131         return swrap.libc.symbols._libc_recvmsg.f(sockfd, msg, flags);
1132 }
1133
1134 static int libc_send(int sockfd, const void *buf, size_t len, int flags)
1135 {
1136         swrap_bind_symbol_all();
1137
1138         return swrap.libc.symbols._libc_send.f(sockfd, buf, len, flags);
1139 }
1140
1141 static int libc_sendmsg(int sockfd, const struct msghdr *msg, int flags)
1142 {
1143         swrap_bind_symbol_all();
1144
1145         return swrap.libc.symbols._libc_sendmsg.f(sockfd, msg, flags);
1146 }
1147
1148 static int libc_sendto(int sockfd,
1149                        const void *buf,
1150                        size_t len,
1151                        int flags,
1152                        const  struct sockaddr *dst_addr,
1153                        socklen_t addrlen)
1154 {
1155         swrap_bind_symbol_all();
1156
1157         return swrap.libc.symbols._libc_sendto.f(sockfd,
1158                                                  buf,
1159                                                  len,
1160                                                  flags,
1161                                                  dst_addr,
1162                                                  addrlen);
1163 }
1164
1165 static int libc_setsockopt(int sockfd,
1166                            int level,
1167                            int optname,
1168                            const void *optval,
1169                            socklen_t optlen)
1170 {
1171         swrap_bind_symbol_all();
1172
1173         return swrap.libc.symbols._libc_setsockopt.f(sockfd,
1174                                                      level,
1175                                                      optname,
1176                                                      optval,
1177                                                      optlen);
1178 }
1179
1180 #ifdef HAVE_SIGNALFD
1181 static int libc_signalfd(int fd, const sigset_t *mask, int flags)
1182 {
1183         swrap_bind_symbol_all();
1184
1185         return swrap.libc.symbols._libc_signalfd.f(fd, mask, flags);
1186 }
1187 #endif
1188
1189 static int libc_socket(int domain, int type, int protocol)
1190 {
1191         swrap_bind_symbol_all();
1192
1193         return swrap.libc.symbols._libc_socket.f(domain, type, protocol);
1194 }
1195
1196 static int libc_socketpair(int domain, int type, int protocol, int sv[2])
1197 {
1198         swrap_bind_symbol_all();
1199
1200         return swrap.libc.symbols._libc_socketpair.f(domain, type, protocol, sv);
1201 }
1202
1203 #ifdef HAVE_TIMERFD_CREATE
1204 static int libc_timerfd_create(int clockid, int flags)
1205 {
1206         swrap_bind_symbol_all();
1207
1208         return swrap.libc.symbols._libc_timerfd_create.f(clockid, flags);
1209 }
1210 #endif
1211
1212 static ssize_t libc_write(int fd, const void *buf, size_t count)
1213 {
1214         swrap_bind_symbol_all();
1215
1216         return swrap.libc.symbols._libc_write.f(fd, buf, count);
1217 }
1218
1219 static ssize_t libc_writev(int fd, const struct iovec *iov, int iovcnt)
1220 {
1221         swrap_bind_symbol_all();
1222
1223         return swrap.libc.symbols._libc_writev.f(fd, iov, iovcnt);
1224 }
1225
1226 /* DO NOT call this function during library initialization! */
1227 static void __swrap_bind_symbol_all_once(void)
1228 {
1229 #ifdef HAVE_ACCEPT4
1230         swrap_bind_symbol_libsocket(accept4);
1231 #else
1232         swrap_bind_symbol_libsocket(accept);
1233 #endif
1234         swrap_bind_symbol_libsocket(bind);
1235         swrap_bind_symbol_libc(close);
1236 #ifdef HAVE___CLOSE_NOCANCEL
1237         swrap_bind_symbol_libc(__close_nocancel);
1238 #endif
1239         swrap_bind_symbol_libsocket(connect);
1240         swrap_bind_symbol_libc(dup);
1241         swrap_bind_symbol_libc(dup2);
1242         swrap_bind_symbol_libc(fcntl);
1243         swrap_bind_symbol_libc(fopen);
1244 #ifdef HAVE_FOPEN64
1245         swrap_bind_symbol_libc(fopen64);
1246 #endif
1247 #ifdef HAVE_EVENTFD
1248         swrap_bind_symbol_libc(eventfd);
1249 #endif
1250         swrap_bind_symbol_libsocket(getpeername);
1251         swrap_bind_symbol_libsocket(getsockname);
1252         swrap_bind_symbol_libsocket(getsockopt);
1253         swrap_bind_symbol_libc(ioctl);
1254         swrap_bind_symbol_libsocket(listen);
1255         swrap_bind_symbol_libc(open);
1256 #ifdef HAVE_OPEN64
1257         swrap_bind_symbol_libc(open64);
1258 #endif
1259         swrap_bind_symbol_libc(openat);
1260         swrap_bind_symbol_libsocket(pipe);
1261         swrap_bind_symbol_libc(read);
1262         swrap_bind_symbol_libsocket(readv);
1263         swrap_bind_symbol_libsocket(recv);
1264         swrap_bind_symbol_libsocket(recvfrom);
1265         swrap_bind_symbol_libsocket(recvmsg);
1266         swrap_bind_symbol_libsocket(send);
1267         swrap_bind_symbol_libsocket(sendmsg);
1268         swrap_bind_symbol_libsocket(sendto);
1269         swrap_bind_symbol_libsocket(setsockopt);
1270 #ifdef HAVE_SIGNALFD
1271         swrap_bind_symbol_libsocket(signalfd);
1272 #endif
1273         swrap_bind_symbol_libsocket(socket);
1274         swrap_bind_symbol_libsocket(socketpair);
1275 #ifdef HAVE_TIMERFD_CREATE
1276         swrap_bind_symbol_libc(timerfd_create);
1277 #endif
1278         swrap_bind_symbol_libc(write);
1279         swrap_bind_symbol_libsocket(writev);
1280 }
1281
1282 static void swrap_bind_symbol_all(void)
1283 {
1284         static pthread_once_t all_symbol_binding_once = PTHREAD_ONCE_INIT;
1285
1286         pthread_once(&all_symbol_binding_once, __swrap_bind_symbol_all_once);
1287 }
1288
1289 /*********************************************************
1290  * SWRAP HELPER FUNCTIONS
1291  *********************************************************/
1292
1293 /*
1294  * We return 127.0.0.0 (default) or 10.53.57.0.
1295  *
1296  * This can be controlled by:
1297  * SOCKET_WRAPPER_IPV4_NETWORK=127.0.0.0 (default)
1298  * or
1299  * SOCKET_WRAPPER_IPV4_NETWORK=10.53.57.0
1300  */
1301 static in_addr_t swrap_ipv4_net(void)
1302 {
1303         static int initialized;
1304         static in_addr_t hv;
1305         const char *net_str = NULL;
1306         struct in_addr nv;
1307         int ret;
1308
1309         if (initialized) {
1310                 return hv;
1311         }
1312         initialized = 1;
1313
1314         net_str = getenv("SOCKET_WRAPPER_IPV4_NETWORK");
1315         if (net_str == NULL) {
1316                 net_str = "127.0.0.0";
1317         }
1318
1319         ret = inet_pton(AF_INET, net_str, &nv);
1320         if (ret <= 0) {
1321                 SWRAP_LOG(SWRAP_LOG_ERROR,
1322                           "INVALID IPv4 Network [%s]",
1323                           net_str);
1324                 abort();
1325         }
1326
1327         hv = ntohl(nv.s_addr);
1328
1329         switch (hv) {
1330         case 0x7f000000:
1331                 /* 127.0.0.0 */
1332                 break;
1333         case 0x0a353900:
1334                 /* 10.53.57.0 */
1335                 break;
1336         default:
1337                 SWRAP_LOG(SWRAP_LOG_ERROR,
1338                           "INVALID IPv4 Network [%s][0x%x] should be "
1339                           "127.0.0.0 or 10.53.57.0",
1340                           net_str, (unsigned)hv);
1341                 abort();
1342         }
1343
1344         return hv;
1345 }
1346
1347 /*
1348  * This returns 127.255.255.255 or 10.255.255.255
1349  */
1350 static in_addr_t swrap_ipv4_bcast(void)
1351 {
1352         in_addr_t hv;
1353
1354         hv = swrap_ipv4_net();
1355         hv |= IN_CLASSA_HOST;
1356
1357         return hv;
1358 }
1359
1360 /*
1361  * This returns 127.0.0.${iface} or 10.53.57.${iface}
1362  */
1363 static in_addr_t swrap_ipv4_iface(unsigned int iface)
1364 {
1365         in_addr_t hv;
1366
1367         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
1368                 SWRAP_LOG(SWRAP_LOG_ERROR,
1369                           "swrap_ipv4_iface(%u) invalid!",
1370                           iface);
1371                 abort();
1372                 return -1;
1373         }
1374
1375         hv = swrap_ipv4_net();
1376         hv |= iface;
1377
1378         return hv;
1379 }
1380
1381 #ifdef HAVE_IPV6
1382 /*
1383  * FD00::5357:5FXX
1384  */
1385 static const struct in6_addr *swrap_ipv6(void)
1386 {
1387         static struct in6_addr v;
1388         static int initialized;
1389         int ret;
1390
1391         if (initialized) {
1392                 return &v;
1393         }
1394         initialized = 1;
1395
1396         ret = inet_pton(AF_INET6, "FD00::5357:5F00", &v);
1397         if (ret <= 0) {
1398                 abort();
1399         }
1400
1401         return &v;
1402 }
1403 #endif
1404
1405 static void set_port(int family, int prt, struct swrap_address *addr)
1406 {
1407         switch (family) {
1408         case AF_INET:
1409                 addr->sa.in.sin_port = htons(prt);
1410                 break;
1411 #ifdef HAVE_IPV6
1412         case AF_INET6:
1413                 addr->sa.in6.sin6_port = htons(prt);
1414                 break;
1415 #endif
1416         }
1417 }
1418
1419 static size_t socket_length(int family)
1420 {
1421         switch (family) {
1422         case AF_INET:
1423                 return sizeof(struct sockaddr_in);
1424 #ifdef HAVE_IPV6
1425         case AF_INET6:
1426                 return sizeof(struct sockaddr_in6);
1427 #endif
1428         }
1429         return 0;
1430 }
1431
1432 struct swrap_sockaddr_buf {
1433         char str[128];
1434 };
1435
1436 static const char *swrap_sockaddr_string(struct swrap_sockaddr_buf *buf,
1437                                          const struct sockaddr *saddr)
1438 {
1439         unsigned int port = 0;
1440         char addr[64] = {0,};
1441
1442         switch (saddr->sa_family) {
1443         case AF_INET: {
1444                 const struct sockaddr_in *in =
1445                     (const struct sockaddr_in *)(const void *)saddr;
1446
1447                 port = ntohs(in->sin_port);
1448
1449                 inet_ntop(saddr->sa_family,
1450                           &in->sin_addr,
1451                           addr, sizeof(addr));
1452                 break;
1453         }
1454 #ifdef HAVE_IPV6
1455         case AF_INET6: {
1456                 const struct sockaddr_in6 *in6 =
1457                     (const struct sockaddr_in6 *)(const void *)saddr;
1458
1459                 port = ntohs(in6->sin6_port);
1460
1461                 inet_ntop(saddr->sa_family,
1462                           &in6->sin6_addr,
1463                           addr, sizeof(addr));
1464                 break;
1465         }
1466 #endif
1467         default:
1468                 snprintf(addr, sizeof(addr),
1469                          "<Unknown address family %u>",
1470                          saddr->sa_family);
1471                 break;
1472         }
1473
1474         snprintf(buf->str, sizeof(buf->str),
1475                  "addr[%s]/port[%u]",
1476                  addr, port);
1477
1478         return buf->str;
1479 }
1480
1481 static struct socket_info *swrap_get_socket_info(int si_index)
1482 {
1483         return (struct socket_info *)(&(sockets[si_index].info));
1484 }
1485
1486 static int swrap_get_refcount(struct socket_info *si)
1487 {
1488         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1489         return sic->meta.refcount;
1490 }
1491
1492 static void swrap_inc_refcount(struct socket_info *si)
1493 {
1494         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1495
1496         sic->meta.refcount += 1;
1497 }
1498
1499 static void swrap_dec_refcount(struct socket_info *si)
1500 {
1501         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1502
1503         sic->meta.refcount -= 1;
1504 }
1505
1506 static int swrap_get_next_free(struct socket_info *si)
1507 {
1508         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1509
1510         return sic->meta.next_free;
1511 }
1512
1513 static void swrap_set_next_free(struct socket_info *si, int next_free)
1514 {
1515         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1516
1517         sic->meta.next_free = next_free;
1518 }
1519
1520 static int swrap_un_path(struct sockaddr_un *un,
1521                          const char *swrap_dir,
1522                          char type,
1523                          unsigned int iface,
1524                          unsigned int prt)
1525 {
1526         int ret;
1527
1528         ret = snprintf(un->sun_path,
1529                        sizeof(un->sun_path),
1530                        "%s/"SOCKET_FORMAT,
1531                        swrap_dir,
1532                        type,
1533                        iface,
1534                        prt);
1535         if ((size_t)ret >= sizeof(un->sun_path)) {
1536                 return ENAMETOOLONG;
1537         }
1538
1539         return 0;
1540 }
1541
1542 static int swrap_un_path_EINVAL(struct sockaddr_un *un,
1543                                 const char *swrap_dir)
1544 {
1545         int ret;
1546
1547         ret = snprintf(un->sun_path,
1548                        sizeof(un->sun_path),
1549                        "%s/EINVAL",
1550                        swrap_dir);
1551
1552         if ((size_t)ret >= sizeof(un->sun_path)) {
1553                 return ENAMETOOLONG;
1554         }
1555
1556         return 0;
1557 }
1558
1559 static bool swrap_dir_usable(const char *swrap_dir)
1560 {
1561         struct sockaddr_un un;
1562         int ret;
1563
1564         ret = swrap_un_path(&un, swrap_dir, SOCKET_TYPE_CHAR_TCP, 0, 0);
1565         if (ret == 0) {
1566                 return true;
1567         }
1568
1569         ret = swrap_un_path_EINVAL(&un, swrap_dir);
1570         if (ret == 0) {
1571                 return true;
1572         }
1573
1574         return false;
1575 }
1576
1577 static char *socket_wrapper_dir(void)
1578 {
1579         char *swrap_dir = NULL;
1580         char *s = getenv("SOCKET_WRAPPER_DIR");
1581         char *t;
1582         bool ok;
1583
1584         if (s == NULL || s[0] == '\0') {
1585                 SWRAP_LOG(SWRAP_LOG_WARN, "SOCKET_WRAPPER_DIR not set");
1586                 return NULL;
1587         }
1588
1589         swrap_dir = realpath(s, NULL);
1590         if (swrap_dir == NULL) {
1591                 SWRAP_LOG(SWRAP_LOG_ERROR,
1592                           "Unable to resolve socket_wrapper dir path: %s - %s",
1593                           s,
1594                           strerror(errno));
1595                 abort();
1596         }
1597
1598         ok = swrap_dir_usable(swrap_dir);
1599         if (ok) {
1600                 goto done;
1601         }
1602
1603         free(swrap_dir);
1604
1605         ok = swrap_dir_usable(s);
1606         if (!ok) {
1607                 SWRAP_LOG(SWRAP_LOG_ERROR, "SOCKET_WRAPPER_DIR is too long");
1608                 abort();
1609         }
1610
1611         t = getenv("SOCKET_WRAPPER_DIR_ALLOW_ORIG");
1612         if (t == NULL) {
1613                 SWRAP_LOG(SWRAP_LOG_ERROR,
1614                           "realpath(SOCKET_WRAPPER_DIR) too long and "
1615                           "SOCKET_WRAPPER_DIR_ALLOW_ORIG not set");
1616                 abort();
1617
1618         }
1619
1620         swrap_dir = strdup(s);
1621         if (swrap_dir == NULL) {
1622                 SWRAP_LOG(SWRAP_LOG_ERROR,
1623                           "Unable to duplicate socket_wrapper dir path");
1624                 abort();
1625         }
1626
1627         SWRAP_LOG(SWRAP_LOG_WARN,
1628                   "realpath(SOCKET_WRAPPER_DIR) too long, "
1629                   "using original SOCKET_WRAPPER_DIR\n");
1630
1631 done:
1632         SWRAP_LOG(SWRAP_LOG_TRACE, "socket_wrapper_dir: %s", swrap_dir);
1633         return swrap_dir;
1634 }
1635
1636 static unsigned int socket_wrapper_mtu(void)
1637 {
1638         static unsigned int max_mtu = 0;
1639         unsigned int tmp;
1640         const char *s;
1641         char *endp;
1642
1643         swrap_mutex_lock(&mtu_update_mutex);
1644
1645         if (max_mtu != 0) {
1646                 goto done;
1647         }
1648
1649         max_mtu = SOCKET_WRAPPER_MTU_DEFAULT;
1650
1651         s = getenv("SOCKET_WRAPPER_MTU");
1652         if (s == NULL) {
1653                 goto done;
1654         }
1655
1656         tmp = strtol(s, &endp, 10);
1657         if (s == endp) {
1658                 goto done;
1659         }
1660
1661         if (tmp < SOCKET_WRAPPER_MTU_MIN || tmp > SOCKET_WRAPPER_MTU_MAX) {
1662                 goto done;
1663         }
1664         max_mtu = tmp;
1665
1666 done:
1667         swrap_mutex_unlock(&mtu_update_mutex);
1668         return max_mtu;
1669 }
1670
1671 static int _socket_wrapper_init_mutex(pthread_mutex_t *m, const char *name)
1672 {
1673         pthread_mutexattr_t ma;
1674         bool need_destroy = false;
1675         int ret = 0;
1676
1677 #define __CHECK(cmd) do { \
1678         ret = cmd; \
1679         if (ret != 0) { \
1680                 SWRAP_LOG(SWRAP_LOG_ERROR, \
1681                           "%s: %s - failed %d", \
1682                           name, #cmd, ret); \
1683                 goto done; \
1684         } \
1685 } while(0)
1686
1687         *m = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
1688         __CHECK(pthread_mutexattr_init(&ma));
1689         need_destroy = true;
1690         __CHECK(pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK));
1691         __CHECK(pthread_mutex_init(m, &ma));
1692 done:
1693         if (need_destroy) {
1694                 pthread_mutexattr_destroy(&ma);
1695         }
1696         return ret;
1697 }
1698
1699 static size_t socket_wrapper_max_sockets(void)
1700 {
1701         const char *s;
1702         size_t tmp;
1703         char *endp;
1704
1705         if (socket_info_max != 0) {
1706                 return socket_info_max;
1707         }
1708
1709         socket_info_max = SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT;
1710
1711         s = getenv("SOCKET_WRAPPER_MAX_SOCKETS");
1712         if (s == NULL || s[0] == '\0') {
1713                 goto done;
1714         }
1715
1716         tmp = strtoul(s, &endp, 10);
1717         if (s == endp) {
1718                 goto done;
1719         }
1720         if (tmp == 0) {
1721                 tmp = SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT;
1722                 SWRAP_LOG(SWRAP_LOG_ERROR,
1723                           "Invalid number of sockets specified, "
1724                           "using default (%zu)",
1725                           tmp);
1726         }
1727
1728         if (tmp > SOCKET_WRAPPER_MAX_SOCKETS_LIMIT) {
1729                 tmp = SOCKET_WRAPPER_MAX_SOCKETS_LIMIT;
1730                 SWRAP_LOG(SWRAP_LOG_ERROR,
1731                           "Invalid number of sockets specified, "
1732                           "using maximum (%zu).",
1733                           tmp);
1734         }
1735
1736         socket_info_max = tmp;
1737
1738 done:
1739         return socket_info_max;
1740 }
1741
1742 static void socket_wrapper_init_fds_idx(void)
1743 {
1744         int *tmp = NULL;
1745         size_t i;
1746
1747         if (socket_fds_idx != NULL) {
1748                 return;
1749         }
1750
1751         tmp = (int *)calloc(socket_fds_max, sizeof(int));
1752         if (tmp == NULL) {
1753                 SWRAP_LOG(SWRAP_LOG_ERROR,
1754                           "Failed to allocate socket fds index array: %s",
1755                           strerror(errno));
1756                 exit(-1);
1757         }
1758
1759         for (i = 0; i < socket_fds_max; i++) {
1760                 tmp[i] = -1;
1761         }
1762
1763         socket_fds_idx = tmp;
1764 }
1765
1766 static void socket_wrapper_init_sockets(void)
1767 {
1768         size_t max_sockets;
1769         size_t i;
1770         int ret = 0;
1771
1772         swrap_bind_symbol_all();
1773
1774         swrap_mutex_lock(&sockets_mutex);
1775
1776         if (sockets != NULL) {
1777                 swrap_mutex_unlock(&sockets_mutex);
1778                 return;
1779         }
1780
1781         SWRAP_LOG(SWRAP_LOG_DEBUG,
1782                   "SOCKET_WRAPPER_PACKAGE[%s] SOCKET_WRAPPER_VERSION[%s]",
1783                   SOCKET_WRAPPER_PACKAGE, SOCKET_WRAPPER_VERSION);
1784
1785         /*
1786          * Intialize the static cache early before
1787          * any thread is able to start.
1788          */
1789         (void)swrap_ipv4_net();
1790
1791         socket_wrapper_init_fds_idx();
1792
1793         /* Needs to be called inside the sockets_mutex lock here. */
1794         max_sockets = socket_wrapper_max_sockets();
1795
1796         sockets = (struct socket_info_container *)calloc(max_sockets,
1797                                         sizeof(struct socket_info_container));
1798
1799         if (sockets == NULL) {
1800                 SWRAP_LOG(SWRAP_LOG_ERROR,
1801                           "Failed to allocate sockets array: %s",
1802                           strerror(errno));
1803                 swrap_mutex_unlock(&sockets_mutex);
1804                 exit(-1);
1805         }
1806
1807         swrap_mutex_lock(&first_free_mutex);
1808         swrap_mutex_lock(&sockets_si_global);
1809
1810         first_free = 0;
1811
1812         for (i = 0; i < max_sockets; i++) {
1813                 swrap_set_next_free(&sockets[i].info, i+1);
1814         }
1815
1816         /* mark the end of the free list */
1817         swrap_set_next_free(&sockets[max_sockets-1].info, -1);
1818
1819         swrap_mutex_unlock(&sockets_si_global);
1820         swrap_mutex_unlock(&first_free_mutex);
1821         swrap_mutex_unlock(&sockets_mutex);
1822         if (ret != 0) {
1823                 exit(-1);
1824         }
1825 }
1826
1827 bool socket_wrapper_enabled(void)
1828 {
1829         char *s = socket_wrapper_dir();
1830
1831         if (s == NULL) {
1832                 return false;
1833         }
1834
1835         SAFE_FREE(s);
1836
1837         socket_wrapper_init_sockets();
1838
1839         return true;
1840 }
1841
1842 static unsigned int socket_wrapper_default_iface(void)
1843 {
1844         const char *s = getenv("SOCKET_WRAPPER_DEFAULT_IFACE");
1845         if (s) {
1846                 unsigned int iface;
1847                 if (sscanf(s, "%u", &iface) == 1) {
1848                         if (iface >= 1 && iface <= MAX_WRAPPED_INTERFACES) {
1849                                 return iface;
1850                         }
1851                 }
1852         }
1853
1854         return 1;/* 127.0.0.1 */
1855 }
1856
1857 static void set_socket_info_index(int fd, int idx)
1858 {
1859         SWRAP_LOG(SWRAP_LOG_TRACE,
1860                   "fd=%d idx=%d",
1861                   fd, idx);
1862         socket_fds_idx[fd] = idx;
1863         /* This builtin issues a full memory barrier. */
1864         __sync_synchronize();
1865 }
1866
1867 static void reset_socket_info_index(int fd)
1868 {
1869         SWRAP_LOG(SWRAP_LOG_TRACE,
1870                   "fd=%d idx=%d",
1871                   fd, -1);
1872         set_socket_info_index(fd, -1);
1873 }
1874
1875 static int find_socket_info_index(int fd)
1876 {
1877         if (fd < 0) {
1878                 return -1;
1879         }
1880
1881         if (socket_fds_idx == NULL) {
1882                 return -1;
1883         }
1884
1885         if ((size_t)fd >= socket_fds_max) {
1886                 /*
1887                  * Do not add a log here as some applications do stupid things
1888                  * like:
1889                  *
1890                  *     for (fd = 0; fd <= getdtablesize(); fd++) {
1891                  *         close(fd)
1892                  *     };
1893                  *
1894                  * This would produce millions of lines of debug messages.
1895                  */
1896 #if 0
1897                 SWRAP_LOG(SWRAP_LOG_ERROR,
1898                           "Looking for a socket info for the fd %d is over the "
1899                           "max socket index limit of %zu.",
1900                           fd,
1901                           socket_fds_max);
1902 #endif
1903                 return -1;
1904         }
1905
1906         /* This builtin issues a full memory barrier. */
1907         __sync_synchronize();
1908         return socket_fds_idx[fd];
1909 }
1910
1911 static int swrap_add_socket_info(const struct socket_info *si_input)
1912 {
1913         struct socket_info *si = NULL;
1914         int si_index = -1;
1915
1916         if (si_input == NULL) {
1917                 errno = EINVAL;
1918                 return -1;
1919         }
1920
1921         swrap_mutex_lock(&first_free_mutex);
1922         if (first_free == -1) {
1923                 errno = ENFILE;
1924                 goto out;
1925         }
1926
1927         si_index = first_free;
1928         si = swrap_get_socket_info(si_index);
1929
1930         SWRAP_LOCK_SI(si);
1931
1932         first_free = swrap_get_next_free(si);
1933         *si = *si_input;
1934         swrap_inc_refcount(si);
1935
1936         SWRAP_UNLOCK_SI(si);
1937
1938 out:
1939         swrap_mutex_unlock(&first_free_mutex);
1940
1941         return si_index;
1942 }
1943
1944 static int swrap_create_socket(struct socket_info *si, int fd)
1945 {
1946         int idx;
1947
1948         if ((size_t)fd >= socket_fds_max) {
1949                 SWRAP_LOG(SWRAP_LOG_ERROR,
1950                           "The max socket index limit of %zu has been reached, "
1951                           "trying to add %d",
1952                           socket_fds_max,
1953                           fd);
1954                 errno = EMFILE;
1955                 return -1;
1956         }
1957
1958         idx = swrap_add_socket_info(si);
1959         if (idx == -1) {
1960                 return -1;
1961         }
1962
1963         set_socket_info_index(fd, idx);
1964
1965         return idx;
1966 }
1967
1968 static int convert_un_in(const struct sockaddr_un *un, struct sockaddr *in, socklen_t *len)
1969 {
1970         unsigned int iface;
1971         unsigned int prt;
1972         const char *p;
1973         char type;
1974
1975         p = strrchr(un->sun_path, '/');
1976         if (p) p++; else p = un->sun_path;
1977
1978         if (sscanf(p, SOCKET_FORMAT, &type, &iface, &prt) != 3) {
1979                 SWRAP_LOG(SWRAP_LOG_ERROR, "sun_path[%s] p[%s]",
1980                           un->sun_path, p);
1981                 errno = EINVAL;
1982                 return -1;
1983         }
1984
1985         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
1986                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
1987                           type, iface, prt);
1988                 errno = EINVAL;
1989                 return -1;
1990         }
1991
1992         if (prt > 0xFFFF) {
1993                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
1994                           type, iface, prt);
1995                 errno = EINVAL;
1996                 return -1;
1997         }
1998
1999         SWRAP_LOG(SWRAP_LOG_TRACE, "type %c iface %u port %u",
2000                   type, iface, prt);
2001
2002         switch(type) {
2003         case SOCKET_TYPE_CHAR_TCP:
2004         case SOCKET_TYPE_CHAR_UDP: {
2005                 struct sockaddr_in *in2 = (struct sockaddr_in *)(void *)in;
2006
2007                 if ((*len) < sizeof(*in2)) {
2008                         SWRAP_LOG(SWRAP_LOG_ERROR,
2009                                   "V4: *len(%zu) < sizeof(*in2)=%zu",
2010                                   (size_t)*len, sizeof(*in2));
2011                         errno = EINVAL;
2012                         return -1;
2013                 }
2014
2015                 memset(in2, 0, sizeof(*in2));
2016                 in2->sin_family = AF_INET;
2017                 in2->sin_addr.s_addr = htonl(swrap_ipv4_iface(iface));
2018                 in2->sin_port = htons(prt);
2019
2020                 *len = sizeof(*in2);
2021                 break;
2022         }
2023 #ifdef HAVE_IPV6
2024         case SOCKET_TYPE_CHAR_TCP_V6:
2025         case SOCKET_TYPE_CHAR_UDP_V6: {
2026                 struct sockaddr_in6 *in2 = (struct sockaddr_in6 *)(void *)in;
2027
2028                 if ((*len) < sizeof(*in2)) {
2029                         SWRAP_LOG(SWRAP_LOG_ERROR,
2030                                   "V6: *len(%zu) < sizeof(*in2)=%zu",
2031                                   (size_t)*len, sizeof(*in2));
2032                         SWRAP_LOG(SWRAP_LOG_ERROR, "LINE:%d", __LINE__);
2033                         errno = EINVAL;
2034                         return -1;
2035                 }
2036
2037                 memset(in2, 0, sizeof(*in2));
2038                 in2->sin6_family = AF_INET6;
2039                 in2->sin6_addr = *swrap_ipv6();
2040                 in2->sin6_addr.s6_addr[15] = iface;
2041                 in2->sin6_port = htons(prt);
2042
2043                 *len = sizeof(*in2);
2044                 break;
2045         }
2046 #endif
2047         default:
2048                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
2049                           type, iface, prt);
2050                 errno = EINVAL;
2051                 return -1;
2052         }
2053
2054         return 0;
2055 }
2056
2057 static int convert_in_un_remote(struct socket_info *si, const struct sockaddr *inaddr, struct sockaddr_un *un,
2058                                 int *bcast)
2059 {
2060         char type = '\0';
2061         unsigned int prt;
2062         unsigned int iface;
2063         int is_bcast = 0;
2064         char *swrap_dir = NULL;
2065
2066         if (bcast) *bcast = 0;
2067
2068         switch (inaddr->sa_family) {
2069         case AF_INET: {
2070                 const struct sockaddr_in *in =
2071                     (const struct sockaddr_in *)(const void *)inaddr;
2072                 unsigned int addr = ntohl(in->sin_addr.s_addr);
2073                 char u_type = '\0';
2074                 char b_type = '\0';
2075                 char a_type = '\0';
2076                 const unsigned int sw_net_addr = swrap_ipv4_net();
2077                 const unsigned int sw_bcast_addr = swrap_ipv4_bcast();
2078
2079                 switch (si->type) {
2080                 case SOCK_STREAM:
2081                         u_type = SOCKET_TYPE_CHAR_TCP;
2082                         break;
2083                 case SOCK_DGRAM:
2084                         u_type = SOCKET_TYPE_CHAR_UDP;
2085                         a_type = SOCKET_TYPE_CHAR_UDP;
2086                         b_type = SOCKET_TYPE_CHAR_UDP;
2087                         break;
2088                 default:
2089                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2090                         errno = ESOCKTNOSUPPORT;
2091                         return -1;
2092                 }
2093
2094                 prt = ntohs(in->sin_port);
2095                 if (a_type && addr == 0xFFFFFFFF) {
2096                         /* 255.255.255.255 only udp */
2097                         is_bcast = 2;
2098                         type = a_type;
2099                         iface = socket_wrapper_default_iface();
2100                 } else if (b_type && addr == sw_bcast_addr) {
2101                         /*
2102                          * 127.255.255.255
2103                          * or
2104                          * 10.255.255.255
2105                          * only udp
2106                          */
2107                         is_bcast = 1;
2108                         type = b_type;
2109                         iface = socket_wrapper_default_iface();
2110                 } else if ((addr & 0xFFFFFF00) == sw_net_addr) {
2111                         /* 127.0.0.X or 10.53.57.X */
2112                         is_bcast = 0;
2113                         type = u_type;
2114                         iface = (addr & 0x000000FF);
2115                 } else {
2116                         struct swrap_sockaddr_buf buf = {};
2117                         SWRAP_LOG(SWRAP_LOG_WARN,
2118                                   "%s",
2119                                   swrap_sockaddr_string(&buf, inaddr));
2120                         errno = ENETUNREACH;
2121                         return -1;
2122                 }
2123                 if (bcast) *bcast = is_bcast;
2124                 break;
2125         }
2126 #ifdef HAVE_IPV6
2127         case AF_INET6: {
2128                 const struct sockaddr_in6 *in =
2129                     (const struct sockaddr_in6 *)(const void *)inaddr;
2130                 struct in6_addr cmp1, cmp2;
2131
2132                 switch (si->type) {
2133                 case SOCK_STREAM:
2134                         type = SOCKET_TYPE_CHAR_TCP_V6;
2135                         break;
2136                 case SOCK_DGRAM:
2137                         type = SOCKET_TYPE_CHAR_UDP_V6;
2138                         break;
2139                 default:
2140                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2141                         errno = ESOCKTNOSUPPORT;
2142                         return -1;
2143                 }
2144
2145                 /* XXX no multicast/broadcast */
2146
2147                 prt = ntohs(in->sin6_port);
2148
2149                 cmp1 = *swrap_ipv6();
2150                 cmp2 = in->sin6_addr;
2151                 cmp2.s6_addr[15] = 0;
2152                 if (IN6_ARE_ADDR_EQUAL(&cmp1, &cmp2)) {
2153                         iface = in->sin6_addr.s6_addr[15];
2154                 } else {
2155                         struct swrap_sockaddr_buf buf = {};
2156                         SWRAP_LOG(SWRAP_LOG_WARN,
2157                                   "%s",
2158                                   swrap_sockaddr_string(&buf, inaddr));
2159                         errno = ENETUNREACH;
2160                         return -1;
2161                 }
2162
2163                 break;
2164         }
2165 #endif
2166         default:
2167                 SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family!");
2168                 errno = ENETUNREACH;
2169                 return -1;
2170         }
2171
2172         if (prt == 0) {
2173                 SWRAP_LOG(SWRAP_LOG_WARN, "Port not set");
2174                 errno = EINVAL;
2175                 return -1;
2176         }
2177
2178         swrap_dir = socket_wrapper_dir();
2179         if (swrap_dir == NULL) {
2180                 errno = EINVAL;
2181                 return -1;
2182         }
2183
2184         if (is_bcast) {
2185                 swrap_un_path_EINVAL(un, swrap_dir);
2186                 SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2187                 SAFE_FREE(swrap_dir);
2188                 /* the caller need to do more processing */
2189                 return 0;
2190         }
2191
2192         swrap_un_path(un, swrap_dir, type, iface, prt);
2193         SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2194
2195         SAFE_FREE(swrap_dir);
2196
2197         return 0;
2198 }
2199
2200 static int convert_in_un_alloc(struct socket_info *si, const struct sockaddr *inaddr, struct sockaddr_un *un,
2201                                int *bcast)
2202 {
2203         char type = '\0';
2204         unsigned int prt;
2205         unsigned int iface;
2206         struct stat st;
2207         int is_bcast = 0;
2208         char *swrap_dir = NULL;
2209
2210         if (bcast) *bcast = 0;
2211
2212         switch (si->family) {
2213         case AF_INET: {
2214                 const struct sockaddr_in *in =
2215                     (const struct sockaddr_in *)(const void *)inaddr;
2216                 unsigned int addr = ntohl(in->sin_addr.s_addr);
2217                 char u_type = '\0';
2218                 char d_type = '\0';
2219                 char b_type = '\0';
2220                 char a_type = '\0';
2221                 const unsigned int sw_net_addr = swrap_ipv4_net();
2222                 const unsigned int sw_bcast_addr = swrap_ipv4_bcast();
2223
2224                 prt = ntohs(in->sin_port);
2225
2226                 switch (si->type) {
2227                 case SOCK_STREAM:
2228                         u_type = SOCKET_TYPE_CHAR_TCP;
2229                         d_type = SOCKET_TYPE_CHAR_TCP;
2230                         break;
2231                 case SOCK_DGRAM:
2232                         u_type = SOCKET_TYPE_CHAR_UDP;
2233                         d_type = SOCKET_TYPE_CHAR_UDP;
2234                         a_type = SOCKET_TYPE_CHAR_UDP;
2235                         b_type = SOCKET_TYPE_CHAR_UDP;
2236                         break;
2237                 default:
2238                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2239                         errno = ESOCKTNOSUPPORT;
2240                         return -1;
2241                 }
2242
2243                 if (addr == 0) {
2244                         /* 0.0.0.0 */
2245                         is_bcast = 0;
2246                         type = d_type;
2247                         iface = socket_wrapper_default_iface();
2248                 } else if (a_type && addr == 0xFFFFFFFF) {
2249                         /* 255.255.255.255 only udp */
2250                         is_bcast = 2;
2251                         type = a_type;
2252                         iface = socket_wrapper_default_iface();
2253                 } else if (b_type && addr == sw_bcast_addr) {
2254                         /* 127.255.255.255 only udp */
2255                         is_bcast = 1;
2256                         type = b_type;
2257                         iface = socket_wrapper_default_iface();
2258                 } else if ((addr & 0xFFFFFF00) == sw_net_addr) {
2259                         /* 127.0.0.X */
2260                         is_bcast = 0;
2261                         type = u_type;
2262                         iface = (addr & 0x000000FF);
2263                 } else {
2264                         errno = EADDRNOTAVAIL;
2265                         return -1;
2266                 }
2267
2268                 /* Store the bind address for connect() */
2269                 if (si->bindname.sa_socklen == 0) {
2270                         struct sockaddr_in bind_in;
2271                         socklen_t blen = sizeof(struct sockaddr_in);
2272
2273                         ZERO_STRUCT(bind_in);
2274                         bind_in.sin_family = in->sin_family;
2275                         bind_in.sin_port = in->sin_port;
2276                         bind_in.sin_addr.s_addr = htonl(swrap_ipv4_iface(iface));
2277                         si->bindname.sa_socklen = blen;
2278                         memcpy(&si->bindname.sa.in, &bind_in, blen);
2279                 }
2280
2281                 break;
2282         }
2283 #ifdef HAVE_IPV6
2284         case AF_INET6: {
2285                 const struct sockaddr_in6 *in =
2286                     (const struct sockaddr_in6 *)(const void *)inaddr;
2287                 struct in6_addr cmp1, cmp2;
2288
2289                 switch (si->type) {
2290                 case SOCK_STREAM:
2291                         type = SOCKET_TYPE_CHAR_TCP_V6;
2292                         break;
2293                 case SOCK_DGRAM:
2294                         type = SOCKET_TYPE_CHAR_UDP_V6;
2295                         break;
2296                 default:
2297                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2298                         errno = ESOCKTNOSUPPORT;
2299                         return -1;
2300                 }
2301
2302                 /* XXX no multicast/broadcast */
2303
2304                 prt = ntohs(in->sin6_port);
2305
2306                 cmp1 = *swrap_ipv6();
2307                 cmp2 = in->sin6_addr;
2308                 cmp2.s6_addr[15] = 0;
2309                 if (IN6_IS_ADDR_UNSPECIFIED(&in->sin6_addr)) {
2310                         iface = socket_wrapper_default_iface();
2311                 } else if (IN6_ARE_ADDR_EQUAL(&cmp1, &cmp2)) {
2312                         iface = in->sin6_addr.s6_addr[15];
2313                 } else {
2314                         errno = EADDRNOTAVAIL;
2315                         return -1;
2316                 }
2317
2318                 /* Store the bind address for connect() */
2319                 if (si->bindname.sa_socklen == 0) {
2320                         struct sockaddr_in6 bind_in;
2321                         socklen_t blen = sizeof(struct sockaddr_in6);
2322
2323                         ZERO_STRUCT(bind_in);
2324                         bind_in.sin6_family = in->sin6_family;
2325                         bind_in.sin6_port = in->sin6_port;
2326
2327                         bind_in.sin6_addr = *swrap_ipv6();
2328                         bind_in.sin6_addr.s6_addr[15] = iface;
2329
2330                         memcpy(&si->bindname.sa.in6, &bind_in, blen);
2331                         si->bindname.sa_socklen = blen;
2332                 }
2333
2334                 break;
2335         }
2336 #endif
2337         default:
2338                 SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2339                 errno = EADDRNOTAVAIL;
2340                 return -1;
2341         }
2342
2343
2344         if (bcast) *bcast = is_bcast;
2345
2346         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
2347                 errno = EINVAL;
2348                 return -1;
2349         }
2350
2351         swrap_dir = socket_wrapper_dir();
2352         if (swrap_dir == NULL) {
2353                 errno = EINVAL;
2354                 return -1;
2355         }
2356
2357         if (prt == 0) {
2358                 /* handle auto-allocation of ephemeral ports */
2359                 for (prt = 5001; prt < 10000; prt++) {
2360                         swrap_un_path(un, swrap_dir, type, iface, prt);
2361                         if (stat(un->sun_path, &st) == 0) continue;
2362
2363                         set_port(si->family, prt, &si->myname);
2364                         set_port(si->family, prt, &si->bindname);
2365
2366                         break;
2367                 }
2368
2369                 if (prt == 10000) {
2370                         errno = ENFILE;
2371                         SAFE_FREE(swrap_dir);
2372                         return -1;
2373                 }
2374         }
2375
2376         swrap_un_path(un, swrap_dir, type, iface, prt);
2377         SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2378
2379         SAFE_FREE(swrap_dir);
2380
2381         return 0;
2382 }
2383
2384 static struct socket_info *find_socket_info(int fd)
2385 {
2386         int idx = find_socket_info_index(fd);
2387
2388         if (idx == -1) {
2389                 return NULL;
2390         }
2391
2392         return swrap_get_socket_info(idx);
2393 }
2394
2395 #if 0 /* FIXME */
2396 static bool check_addr_port_in_use(const struct sockaddr *sa, socklen_t len)
2397 {
2398         struct socket_info_fd *f;
2399         const struct socket_info *last_s = NULL;
2400
2401         /* first catch invalid input */
2402         switch (sa->sa_family) {
2403         case AF_INET:
2404                 if (len < sizeof(struct sockaddr_in)) {
2405                         return false;
2406                 }
2407                 break;
2408 #ifdef HAVE_IPV6
2409         case AF_INET6:
2410                 if (len < sizeof(struct sockaddr_in6)) {
2411                         return false;
2412                 }
2413                 break;
2414 #endif
2415         default:
2416                 return false;
2417                 break;
2418         }
2419
2420         for (f = socket_fds; f; f = f->next) {
2421                 struct socket_info *s = swrap_get_socket_info(f->si_index);
2422
2423                 if (s == last_s) {
2424                         continue;
2425                 }
2426                 last_s = s;
2427
2428                 if (s->myname == NULL) {
2429                         continue;
2430                 }
2431                 if (s->myname->sa_family != sa->sa_family) {
2432                         continue;
2433                 }
2434                 switch (s->myname->sa_family) {
2435                 case AF_INET: {
2436                         struct sockaddr_in *sin1, *sin2;
2437
2438                         sin1 = (struct sockaddr_in *)s->myname;
2439                         sin2 = (struct sockaddr_in *)sa;
2440
2441                         if (sin1->sin_addr.s_addr == htonl(INADDR_ANY)) {
2442                                 continue;
2443                         }
2444                         if (sin1->sin_port != sin2->sin_port) {
2445                                 continue;
2446                         }
2447                         if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr) {
2448                                 continue;
2449                         }
2450
2451                         /* found */
2452                         return true;
2453                         break;
2454                 }
2455 #ifdef HAVE_IPV6
2456                 case AF_INET6: {
2457                         struct sockaddr_in6 *sin1, *sin2;
2458
2459                         sin1 = (struct sockaddr_in6 *)s->myname;
2460                         sin2 = (struct sockaddr_in6 *)sa;
2461
2462                         if (sin1->sin6_port != sin2->sin6_port) {
2463                                 continue;
2464                         }
2465                         if (!IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2466                                                 &sin2->sin6_addr))
2467                         {
2468                                 continue;
2469                         }
2470
2471                         /* found */
2472                         return true;
2473                         break;
2474                 }
2475 #endif
2476                 default:
2477                         continue;
2478                         break;
2479
2480                 }
2481         }
2482
2483         return false;
2484 }
2485 #endif
2486
2487 static void swrap_remove_stale(int fd);
2488
2489 static int sockaddr_convert_to_un(struct socket_info *si,
2490                                   const struct sockaddr *in_addr,
2491                                   socklen_t in_len,
2492                                   struct sockaddr_un *out_addr,
2493                                   int alloc_sock,
2494                                   int *bcast)
2495 {
2496         struct sockaddr *out = (struct sockaddr *)(void *)out_addr;
2497
2498         (void) in_len; /* unused */
2499
2500         if (out_addr == NULL) {
2501                 return 0;
2502         }
2503
2504         out->sa_family = AF_UNIX;
2505 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2506         out->sa_len = sizeof(*out_addr);
2507 #endif
2508
2509         switch (in_addr->sa_family) {
2510         case AF_UNSPEC: {
2511                 const struct sockaddr_in *sin;
2512                 if (si->family != AF_INET) {
2513                         break;
2514                 }
2515                 if (in_len < sizeof(struct sockaddr_in)) {
2516                         break;
2517                 }
2518                 sin = (const struct sockaddr_in *)(const void *)in_addr;
2519                 if(sin->sin_addr.s_addr != htonl(INADDR_ANY)) {
2520                         break;
2521                 }
2522
2523                 /*
2524                  * Note: in the special case of AF_UNSPEC and INADDR_ANY,
2525                  * AF_UNSPEC is mapped to AF_INET and must be treated here.
2526                  */
2527
2528                 FALL_THROUGH;
2529         }
2530         case AF_INET:
2531 #ifdef HAVE_IPV6
2532         case AF_INET6:
2533 #endif
2534                 switch (si->type) {
2535                 case SOCK_STREAM:
2536                 case SOCK_DGRAM:
2537                         break;
2538                 default:
2539                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2540                         errno = ESOCKTNOSUPPORT;
2541                         return -1;
2542                 }
2543                 if (alloc_sock) {
2544                         return convert_in_un_alloc(si, in_addr, out_addr, bcast);
2545                 } else {
2546                         return convert_in_un_remote(si, in_addr, out_addr, bcast);
2547                 }
2548         default:
2549                 break;
2550         }
2551
2552         errno = EAFNOSUPPORT;
2553         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2554         return -1;
2555 }
2556
2557 static int sockaddr_convert_from_un(const struct socket_info *si,
2558                                     const struct sockaddr_un *in_addr,
2559                                     socklen_t un_addrlen,
2560                                     int family,
2561                                     struct sockaddr *out_addr,
2562                                     socklen_t *out_addrlen)
2563 {
2564         int ret;
2565
2566         if (out_addr == NULL || out_addrlen == NULL)
2567                 return 0;
2568
2569         if (un_addrlen == 0) {
2570                 *out_addrlen = 0;
2571                 return 0;
2572         }
2573
2574         switch (family) {
2575         case AF_INET:
2576 #ifdef HAVE_IPV6
2577         case AF_INET6:
2578 #endif
2579                 switch (si->type) {
2580                 case SOCK_STREAM:
2581                 case SOCK_DGRAM:
2582                         break;
2583                 default:
2584                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2585                         errno = ESOCKTNOSUPPORT;
2586                         return -1;
2587                 }
2588                 ret = convert_un_in(in_addr, out_addr, out_addrlen);
2589 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2590                 out_addr->sa_len = *out_addrlen;
2591 #endif
2592                 return ret;
2593         default:
2594                 break;
2595         }
2596
2597         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2598         errno = EAFNOSUPPORT;
2599         return -1;
2600 }
2601
2602 enum swrap_packet_type {
2603         SWRAP_CONNECT_SEND,
2604         SWRAP_CONNECT_UNREACH,
2605         SWRAP_CONNECT_RECV,
2606         SWRAP_CONNECT_ACK,
2607         SWRAP_ACCEPT_SEND,
2608         SWRAP_ACCEPT_RECV,
2609         SWRAP_ACCEPT_ACK,
2610         SWRAP_RECVFROM,
2611         SWRAP_SENDTO,
2612         SWRAP_SENDTO_UNREACH,
2613         SWRAP_PENDING_RST,
2614         SWRAP_RECV,
2615         SWRAP_RECV_RST,
2616         SWRAP_SEND,
2617         SWRAP_SEND_RST,
2618         SWRAP_CLOSE_SEND,
2619         SWRAP_CLOSE_RECV,
2620         SWRAP_CLOSE_ACK,
2621 };
2622
2623 struct swrap_file_hdr {
2624         uint32_t        magic;
2625         uint16_t        version_major;
2626         uint16_t        version_minor;
2627         int32_t         timezone;
2628         uint32_t        sigfigs;
2629         uint32_t        frame_max_len;
2630 #define SWRAP_FRAME_LENGTH_MAX 0xFFFF
2631         uint32_t        link_type;
2632 };
2633 #define SWRAP_FILE_HDR_SIZE 24
2634
2635 struct swrap_packet_frame {
2636         uint32_t seconds;
2637         uint32_t micro_seconds;
2638         uint32_t recorded_length;
2639         uint32_t full_length;
2640 };
2641 #define SWRAP_PACKET_FRAME_SIZE 16
2642
2643 union swrap_packet_ip {
2644         struct {
2645                 uint8_t         ver_hdrlen;
2646                 uint8_t         tos;
2647                 uint16_t        packet_length;
2648                 uint16_t        identification;
2649                 uint8_t         flags;
2650                 uint8_t         fragment;
2651                 uint8_t         ttl;
2652                 uint8_t         protocol;
2653                 uint16_t        hdr_checksum;
2654                 uint32_t        src_addr;
2655                 uint32_t        dest_addr;
2656         } v4;
2657 #define SWRAP_PACKET_IP_V4_SIZE 20
2658         struct {
2659                 uint8_t         ver_prio;
2660                 uint8_t         flow_label_high;
2661                 uint16_t        flow_label_low;
2662                 uint16_t        payload_length;
2663                 uint8_t         next_header;
2664                 uint8_t         hop_limit;
2665                 uint8_t         src_addr[16];
2666                 uint8_t         dest_addr[16];
2667         } v6;
2668 #define SWRAP_PACKET_IP_V6_SIZE 40
2669 };
2670 #define SWRAP_PACKET_IP_SIZE 40
2671
2672 union swrap_packet_payload {
2673         struct {
2674                 uint16_t        source_port;
2675                 uint16_t        dest_port;
2676                 uint32_t        seq_num;
2677                 uint32_t        ack_num;
2678                 uint8_t         hdr_length;
2679                 uint8_t         control;
2680                 uint16_t        window;
2681                 uint16_t        checksum;
2682                 uint16_t        urg;
2683         } tcp;
2684 #define SWRAP_PACKET_PAYLOAD_TCP_SIZE 20
2685         struct {
2686                 uint16_t        source_port;
2687                 uint16_t        dest_port;
2688                 uint16_t        length;
2689                 uint16_t        checksum;
2690         } udp;
2691 #define SWRAP_PACKET_PAYLOAD_UDP_SIZE 8
2692         struct {
2693                 uint8_t         type;
2694                 uint8_t         code;
2695                 uint16_t        checksum;
2696                 uint32_t        unused;
2697         } icmp4;
2698 #define SWRAP_PACKET_PAYLOAD_ICMP4_SIZE 8
2699         struct {
2700                 uint8_t         type;
2701                 uint8_t         code;
2702                 uint16_t        checksum;
2703                 uint32_t        unused;
2704         } icmp6;
2705 #define SWRAP_PACKET_PAYLOAD_ICMP6_SIZE 8
2706 };
2707 #define SWRAP_PACKET_PAYLOAD_SIZE 20
2708
2709 #define SWRAP_PACKET_MIN_ALLOC \
2710         (SWRAP_PACKET_FRAME_SIZE + \
2711          SWRAP_PACKET_IP_SIZE + \
2712          SWRAP_PACKET_PAYLOAD_SIZE)
2713
2714 static const char *swrap_pcap_init_file(void)
2715 {
2716         static int initialized = 0;
2717         static const char *s = NULL;
2718         static const struct swrap_file_hdr h;
2719         static const struct swrap_packet_frame f;
2720         static const union swrap_packet_ip i;
2721         static const union swrap_packet_payload p;
2722
2723         if (initialized == 1) {
2724                 return s;
2725         }
2726         initialized = 1;
2727
2728         /*
2729          * TODO: don't use the structs use plain buffer offsets
2730          *       and PUSH_U8(), PUSH_U16() and PUSH_U32()
2731          *
2732          * for now make sure we disable PCAP support
2733          * if the struct has alignment!
2734          */
2735         if (sizeof(h) != SWRAP_FILE_HDR_SIZE) {
2736                 return NULL;
2737         }
2738         if (sizeof(f) != SWRAP_PACKET_FRAME_SIZE) {
2739                 return NULL;
2740         }
2741         if (sizeof(i) != SWRAP_PACKET_IP_SIZE) {
2742                 return NULL;
2743         }
2744         if (sizeof(i.v4) != SWRAP_PACKET_IP_V4_SIZE) {
2745                 return NULL;
2746         }
2747         if (sizeof(i.v6) != SWRAP_PACKET_IP_V6_SIZE) {
2748                 return NULL;
2749         }
2750         if (sizeof(p) != SWRAP_PACKET_PAYLOAD_SIZE) {
2751                 return NULL;
2752         }
2753         if (sizeof(p.tcp) != SWRAP_PACKET_PAYLOAD_TCP_SIZE) {
2754                 return NULL;
2755         }
2756         if (sizeof(p.udp) != SWRAP_PACKET_PAYLOAD_UDP_SIZE) {
2757                 return NULL;
2758         }
2759         if (sizeof(p.icmp4) != SWRAP_PACKET_PAYLOAD_ICMP4_SIZE) {
2760                 return NULL;
2761         }
2762         if (sizeof(p.icmp6) != SWRAP_PACKET_PAYLOAD_ICMP6_SIZE) {
2763                 return NULL;
2764         }
2765
2766         s = getenv("SOCKET_WRAPPER_PCAP_FILE");
2767         if (s == NULL) {
2768                 return NULL;
2769         }
2770         if (strncmp(s, "./", 2) == 0) {
2771                 s += 2;
2772         }
2773         SWRAP_LOG(SWRAP_LOG_TRACE, "SOCKET_WRAPPER_PCAP_FILE: %s", s);
2774         return s;
2775 }
2776
2777 static uint8_t *swrap_pcap_packet_init(struct timeval *tval,
2778                                        const struct sockaddr *src,
2779                                        const struct sockaddr *dest,
2780                                        int socket_type,
2781                                        const uint8_t *payload,
2782                                        size_t payload_len,
2783                                        unsigned long tcp_seqno,
2784                                        unsigned long tcp_ack,
2785                                        unsigned char tcp_ctl,
2786                                        int unreachable,
2787                                        size_t *_packet_len)
2788 {
2789         uint8_t *base = NULL;
2790         uint8_t *buf = NULL;
2791         union {
2792                 uint8_t *ptr;
2793                 struct swrap_packet_frame *frame;
2794         } f;
2795         union {
2796                 uint8_t *ptr;
2797                 union swrap_packet_ip *ip;
2798         } i;
2799         union swrap_packet_payload *pay;
2800         size_t packet_len;
2801         size_t alloc_len;
2802         size_t nonwire_len = sizeof(struct swrap_packet_frame);
2803         size_t wire_hdr_len = 0;
2804         size_t wire_len = 0;
2805         size_t ip_hdr_len = 0;
2806         size_t icmp_hdr_len = 0;
2807         size_t icmp_truncate_len = 0;
2808         uint8_t protocol = 0, icmp_protocol = 0;
2809         const struct sockaddr_in *src_in = NULL;
2810         const struct sockaddr_in *dest_in = NULL;
2811 #ifdef HAVE_IPV6
2812         const struct sockaddr_in6 *src_in6 = NULL;
2813         const struct sockaddr_in6 *dest_in6 = NULL;
2814 #endif
2815         uint16_t src_port;
2816         uint16_t dest_port;
2817
2818         switch (src->sa_family) {
2819         case AF_INET:
2820                 src_in = (const struct sockaddr_in *)(const void *)src;
2821                 dest_in = (const struct sockaddr_in *)(const void *)dest;
2822                 src_port = src_in->sin_port;
2823                 dest_port = dest_in->sin_port;
2824                 ip_hdr_len = sizeof(i.ip->v4);
2825                 break;
2826 #ifdef HAVE_IPV6
2827         case AF_INET6:
2828                 src_in6 = (const struct sockaddr_in6 *)(const void *)src;
2829                 dest_in6 = (const struct sockaddr_in6 *)(const void *)dest;
2830                 src_port = src_in6->sin6_port;
2831                 dest_port = dest_in6->sin6_port;
2832                 ip_hdr_len = sizeof(i.ip->v6);
2833                 break;
2834 #endif
2835         default:
2836                 return NULL;
2837         }
2838
2839         switch (socket_type) {
2840         case SOCK_STREAM:
2841                 protocol = 0x06; /* TCP */
2842                 wire_hdr_len = ip_hdr_len + sizeof(pay->tcp);
2843                 wire_len = wire_hdr_len + payload_len;
2844                 break;
2845
2846         case SOCK_DGRAM:
2847                 protocol = 0x11; /* UDP */
2848                 wire_hdr_len = ip_hdr_len + sizeof(pay->udp);
2849                 wire_len = wire_hdr_len + payload_len;
2850                 break;
2851
2852         default:
2853                 return NULL;
2854         }
2855
2856         if (unreachable) {
2857                 icmp_protocol = protocol;
2858                 switch (src->sa_family) {
2859                 case AF_INET:
2860                         protocol = 0x01; /* ICMPv4 */
2861                         icmp_hdr_len = ip_hdr_len + sizeof(pay->icmp4);
2862                         break;
2863 #ifdef HAVE_IPV6
2864                 case AF_INET6:
2865                         protocol = 0x3A; /* ICMPv6 */
2866                         icmp_hdr_len = ip_hdr_len + sizeof(pay->icmp6);
2867                         break;
2868 #endif
2869                 }
2870                 if (wire_len > 64 ) {
2871                         icmp_truncate_len = wire_len - 64;
2872                 }
2873                 wire_len += icmp_hdr_len;
2874         }
2875
2876         packet_len = nonwire_len + wire_len;
2877         alloc_len = packet_len;
2878         if (alloc_len < SWRAP_PACKET_MIN_ALLOC) {
2879                 alloc_len = SWRAP_PACKET_MIN_ALLOC;
2880         }
2881
2882         base = (uint8_t *)calloc(1, alloc_len);
2883         if (base == NULL) {
2884                 return NULL;
2885         }
2886
2887         buf = base;
2888         f.ptr = buf;
2889
2890         f.frame->seconds                = tval->tv_sec;
2891         f.frame->micro_seconds  = tval->tv_usec;
2892         f.frame->recorded_length        = wire_len - icmp_truncate_len;
2893         f.frame->full_length    = wire_len - icmp_truncate_len;
2894
2895         buf += SWRAP_PACKET_FRAME_SIZE;
2896
2897         i.ptr = buf;
2898         switch (src->sa_family) {
2899         case AF_INET:
2900                 if (src_in == NULL || dest_in == NULL) {
2901                         SAFE_FREE(base);
2902                         return NULL;
2903                 }
2904
2905                 i.ip->v4.ver_hdrlen     = 0x45; /* version 4 and 5 * 32 bit words */
2906                 i.ip->v4.tos            = 0x00;
2907                 i.ip->v4.packet_length  = htons(wire_len - icmp_truncate_len);
2908                 i.ip->v4.identification = htons(0xFFFF);
2909                 i.ip->v4.flags          = 0x40; /* BIT 1 set - means don't fragment */
2910                 i.ip->v4.fragment       = htons(0x0000);
2911                 i.ip->v4.ttl            = 0xFF;
2912                 i.ip->v4.protocol       = protocol;
2913                 i.ip->v4.hdr_checksum   = htons(0x0000);
2914                 i.ip->v4.src_addr       = src_in->sin_addr.s_addr;
2915                 i.ip->v4.dest_addr      = dest_in->sin_addr.s_addr;
2916                 buf += SWRAP_PACKET_IP_V4_SIZE;
2917                 break;
2918 #ifdef HAVE_IPV6
2919         case AF_INET6:
2920                 if (src_in6 == NULL || dest_in6 == NULL) {
2921                         SAFE_FREE(base);
2922                         return NULL;
2923                 }
2924
2925                 i.ip->v6.ver_prio               = 0x60; /* version 4 and 5 * 32 bit words */
2926                 i.ip->v6.flow_label_high        = 0x00;
2927                 i.ip->v6.flow_label_low = 0x0000;
2928                 i.ip->v6.payload_length = htons(wire_len - icmp_truncate_len); /* TODO */
2929                 i.ip->v6.next_header    = protocol;
2930                 memcpy(i.ip->v6.src_addr, src_in6->sin6_addr.s6_addr, 16);
2931                 memcpy(i.ip->v6.dest_addr, dest_in6->sin6_addr.s6_addr, 16);
2932                 buf += SWRAP_PACKET_IP_V6_SIZE;
2933                 break;
2934 #endif
2935         }
2936
2937         if (unreachable) {
2938                 pay = (union swrap_packet_payload *)(void *)buf;
2939                 switch (src->sa_family) {
2940                 case AF_INET:
2941                         pay->icmp4.type         = 0x03; /* destination unreachable */
2942                         pay->icmp4.code         = 0x01; /* host unreachable */
2943                         pay->icmp4.checksum     = htons(0x0000);
2944                         pay->icmp4.unused       = htonl(0x00000000);
2945
2946                         buf += SWRAP_PACKET_PAYLOAD_ICMP4_SIZE;
2947
2948                         /* set the ip header in the ICMP payload */
2949                         i.ptr = buf;
2950                         i.ip->v4.ver_hdrlen     = 0x45; /* version 4 and 5 * 32 bit words */
2951                         i.ip->v4.tos            = 0x00;
2952                         i.ip->v4.packet_length  = htons(wire_len - icmp_hdr_len);
2953                         i.ip->v4.identification = htons(0xFFFF);
2954                         i.ip->v4.flags          = 0x40; /* BIT 1 set - means don't fragment */
2955                         i.ip->v4.fragment       = htons(0x0000);
2956                         i.ip->v4.ttl            = 0xFF;
2957                         i.ip->v4.protocol       = icmp_protocol;
2958                         i.ip->v4.hdr_checksum   = htons(0x0000);
2959                         i.ip->v4.src_addr       = dest_in->sin_addr.s_addr;
2960                         i.ip->v4.dest_addr      = src_in->sin_addr.s_addr;
2961
2962                         buf += SWRAP_PACKET_IP_V4_SIZE;
2963
2964                         src_port = dest_in->sin_port;
2965                         dest_port = src_in->sin_port;
2966                         break;
2967 #ifdef HAVE_IPV6
2968                 case AF_INET6:
2969                         pay->icmp6.type         = 0x01; /* destination unreachable */
2970                         pay->icmp6.code         = 0x03; /* address unreachable */
2971                         pay->icmp6.checksum     = htons(0x0000);
2972                         pay->icmp6.unused       = htonl(0x00000000);
2973                         buf += SWRAP_PACKET_PAYLOAD_ICMP6_SIZE;
2974
2975                         /* set the ip header in the ICMP payload */
2976                         i.ptr = buf;
2977                         i.ip->v6.ver_prio               = 0x60; /* version 4 and 5 * 32 bit words */
2978                         i.ip->v6.flow_label_high        = 0x00;
2979                         i.ip->v6.flow_label_low = 0x0000;
2980                         i.ip->v6.payload_length = htons(wire_len - icmp_truncate_len); /* TODO */
2981                         i.ip->v6.next_header    = protocol;
2982                         memcpy(i.ip->v6.src_addr, dest_in6->sin6_addr.s6_addr, 16);
2983                         memcpy(i.ip->v6.dest_addr, src_in6->sin6_addr.s6_addr, 16);
2984
2985                         buf += SWRAP_PACKET_IP_V6_SIZE;
2986
2987                         src_port = dest_in6->sin6_port;
2988                         dest_port = src_in6->sin6_port;
2989                         break;
2990 #endif
2991                 }
2992         }
2993
2994         pay = (union swrap_packet_payload *)(void *)buf;
2995
2996         switch (socket_type) {
2997         case SOCK_STREAM:
2998                 pay->tcp.source_port    = src_port;
2999                 pay->tcp.dest_port      = dest_port;
3000                 pay->tcp.seq_num        = htonl(tcp_seqno);
3001                 pay->tcp.ack_num        = htonl(tcp_ack);
3002                 pay->tcp.hdr_length     = 0x50; /* 5 * 32 bit words */
3003                 pay->tcp.control        = tcp_ctl;
3004                 pay->tcp.window         = htons(0x7FFF);
3005                 pay->tcp.checksum       = htons(0x0000);
3006                 pay->tcp.urg            = htons(0x0000);
3007                 buf += SWRAP_PACKET_PAYLOAD_TCP_SIZE;
3008
3009                 break;
3010
3011         case SOCK_DGRAM:
3012                 pay->udp.source_port    = src_port;
3013                 pay->udp.dest_port      = dest_port;
3014                 pay->udp.length         = htons(8 + payload_len);
3015                 pay->udp.checksum       = htons(0x0000);
3016                 buf += SWRAP_PACKET_PAYLOAD_UDP_SIZE;
3017
3018                 break;
3019         }
3020
3021         if (payload && payload_len > 0) {
3022                 memcpy(buf, payload, payload_len);
3023         }
3024
3025         *_packet_len = packet_len - icmp_truncate_len;
3026         return base;
3027 }
3028
3029 static int swrap_pcap_get_fd(const char *fname)
3030 {
3031         static int fd = -1;
3032
3033         if (fd != -1) {
3034                 return fd;
3035         }
3036
3037         fd = libc_open(fname, O_WRONLY|O_CREAT|O_EXCL|O_APPEND, 0644);
3038         if (fd != -1) {
3039                 struct swrap_file_hdr file_hdr;
3040                 file_hdr.magic          = 0xA1B2C3D4;
3041                 file_hdr.version_major  = 0x0002;
3042                 file_hdr.version_minor  = 0x0004;
3043                 file_hdr.timezone       = 0x00000000;
3044                 file_hdr.sigfigs        = 0x00000000;
3045                 file_hdr.frame_max_len  = SWRAP_FRAME_LENGTH_MAX;
3046                 file_hdr.link_type      = 0x0065; /* 101 RAW IP */
3047
3048                 if (libc_write(fd, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
3049                         libc_close(fd);
3050                         fd = -1;
3051                 }
3052                 return fd;
3053         }
3054
3055         fd = libc_open(fname, O_WRONLY|O_APPEND, 0644);
3056
3057         return fd;
3058 }
3059
3060 static uint8_t *swrap_pcap_marshall_packet(struct socket_info *si,
3061                                            const struct sockaddr *addr,
3062                                            enum swrap_packet_type type,
3063                                            const void *buf, size_t len,
3064                                            size_t *packet_len)
3065 {
3066         const struct sockaddr *src_addr;
3067         const struct sockaddr *dest_addr;
3068         unsigned long tcp_seqno = 0;
3069         unsigned long tcp_ack = 0;
3070         unsigned char tcp_ctl = 0;
3071         int unreachable = 0;
3072
3073         struct timeval tv;
3074
3075         switch (si->family) {
3076         case AF_INET:
3077                 break;
3078 #ifdef HAVE_IPV6
3079         case AF_INET6:
3080                 break;
3081 #endif
3082         default:
3083                 return NULL;
3084         }
3085
3086         switch (type) {
3087         case SWRAP_CONNECT_SEND:
3088                 if (si->type != SOCK_STREAM) {
3089                         return NULL;
3090                 }
3091
3092                 src_addr  = &si->myname.sa.s;
3093                 dest_addr = addr;
3094
3095                 tcp_seqno = si->io.pck_snd;
3096                 tcp_ack = si->io.pck_rcv;
3097                 tcp_ctl = 0x02; /* SYN */
3098
3099                 si->io.pck_snd += 1;
3100
3101                 break;
3102
3103         case SWRAP_CONNECT_RECV:
3104                 if (si->type != SOCK_STREAM) {
3105                         return NULL;
3106                 }
3107
3108                 dest_addr = &si->myname.sa.s;
3109                 src_addr = addr;
3110
3111                 tcp_seqno = si->io.pck_rcv;
3112                 tcp_ack = si->io.pck_snd;
3113                 tcp_ctl = 0x12; /** SYN,ACK */
3114
3115                 si->io.pck_rcv += 1;
3116
3117                 break;
3118
3119         case SWRAP_CONNECT_UNREACH:
3120                 if (si->type != SOCK_STREAM) {
3121                         return NULL;
3122                 }
3123
3124                 dest_addr = &si->myname.sa.s;
3125                 src_addr  = addr;
3126
3127                 /* Unreachable: resend the data of SWRAP_CONNECT_SEND */
3128                 tcp_seqno = si->io.pck_snd - 1;
3129                 tcp_ack = si->io.pck_rcv;
3130                 tcp_ctl = 0x02; /* SYN */
3131                 unreachable = 1;
3132
3133                 break;
3134
3135         case SWRAP_CONNECT_ACK:
3136                 if (si->type != SOCK_STREAM) {
3137                         return NULL;
3138                 }
3139
3140                 src_addr  = &si->myname.sa.s;
3141                 dest_addr = addr;
3142
3143                 tcp_seqno = si->io.pck_snd;
3144                 tcp_ack = si->io.pck_rcv;
3145                 tcp_ctl = 0x10; /* ACK */
3146
3147                 break;
3148
3149         case SWRAP_ACCEPT_SEND:
3150                 if (si->type != SOCK_STREAM) {
3151                         return NULL;
3152                 }
3153
3154                 dest_addr = &si->myname.sa.s;
3155                 src_addr = addr;
3156
3157                 tcp_seqno = si->io.pck_rcv;
3158                 tcp_ack = si->io.pck_snd;
3159                 tcp_ctl = 0x02; /* SYN */
3160
3161                 si->io.pck_rcv += 1;
3162
3163                 break;
3164
3165         case SWRAP_ACCEPT_RECV:
3166                 if (si->type != SOCK_STREAM) {
3167                         return NULL;
3168                 }
3169
3170                 src_addr = &si->myname.sa.s;
3171                 dest_addr = addr;
3172
3173                 tcp_seqno = si->io.pck_snd;
3174                 tcp_ack = si->io.pck_rcv;
3175                 tcp_ctl = 0x12; /* SYN,ACK */
3176
3177                 si->io.pck_snd += 1;
3178
3179                 break;
3180
3181         case SWRAP_ACCEPT_ACK:
3182                 if (si->type != SOCK_STREAM) {
3183                         return NULL;
3184                 }
3185
3186                 dest_addr = &si->myname.sa.s;
3187                 src_addr = addr;
3188
3189                 tcp_seqno = si->io.pck_rcv;
3190                 tcp_ack = si->io.pck_snd;
3191                 tcp_ctl = 0x10; /* ACK */
3192
3193                 break;
3194
3195         case SWRAP_SEND:
3196                 src_addr  = &si->myname.sa.s;
3197                 dest_addr = &si->peername.sa.s;
3198
3199                 tcp_seqno = si->io.pck_snd;
3200                 tcp_ack = si->io.pck_rcv;
3201                 tcp_ctl = 0x18; /* PSH,ACK */
3202
3203                 si->io.pck_snd += len;
3204
3205                 break;
3206
3207         case SWRAP_SEND_RST:
3208                 dest_addr = &si->myname.sa.s;
3209                 src_addr  = &si->peername.sa.s;
3210
3211                 if (si->type == SOCK_DGRAM) {
3212                         return swrap_pcap_marshall_packet(si,
3213                                                           &si->peername.sa.s,
3214                                                           SWRAP_SENDTO_UNREACH,
3215                                                           buf,
3216                                                           len,
3217                                                           packet_len);
3218                 }
3219
3220                 tcp_seqno = si->io.pck_rcv;
3221                 tcp_ack = si->io.pck_snd;
3222                 tcp_ctl = 0x14; /** RST,ACK */
3223
3224                 break;
3225
3226         case SWRAP_PENDING_RST:
3227                 dest_addr = &si->myname.sa.s;
3228                 src_addr  = &si->peername.sa.s;
3229
3230                 if (si->type == SOCK_DGRAM) {
3231                         return NULL;
3232                 }
3233
3234                 tcp_seqno = si->io.pck_rcv;
3235                 tcp_ack = si->io.pck_snd;
3236                 tcp_ctl = 0x14; /* RST,ACK */
3237
3238                 break;
3239
3240         case SWRAP_RECV:
3241                 dest_addr = &si->myname.sa.s;
3242                 src_addr  = &si->peername.sa.s;
3243
3244                 tcp_seqno = si->io.pck_rcv;
3245                 tcp_ack = si->io.pck_snd;
3246                 tcp_ctl = 0x18; /* PSH,ACK */
3247
3248                 si->io.pck_rcv += len;
3249
3250                 break;
3251
3252         case SWRAP_RECV_RST:
3253                 dest_addr = &si->myname.sa.s;
3254                 src_addr  = &si->peername.sa.s;
3255
3256                 if (si->type == SOCK_DGRAM) {
3257                         return NULL;
3258                 }
3259
3260                 tcp_seqno = si->io.pck_rcv;
3261                 tcp_ack = si->io.pck_snd;
3262                 tcp_ctl = 0x14; /* RST,ACK */
3263
3264                 break;
3265
3266         case SWRAP_SENDTO:
3267                 src_addr = &si->myname.sa.s;
3268                 dest_addr = addr;
3269
3270                 si->io.pck_snd += len;
3271
3272                 break;
3273
3274         case SWRAP_SENDTO_UNREACH:
3275                 dest_addr = &si->myname.sa.s;
3276                 src_addr = addr;
3277
3278                 unreachable = 1;
3279
3280                 break;
3281
3282         case SWRAP_RECVFROM:
3283                 dest_addr = &si->myname.sa.s;
3284                 src_addr = addr;
3285
3286                 si->io.pck_rcv += len;
3287
3288                 break;
3289
3290         case SWRAP_CLOSE_SEND:
3291                 if (si->type != SOCK_STREAM) {
3292                         return NULL;
3293                 }
3294
3295                 src_addr  = &si->myname.sa.s;
3296                 dest_addr = &si->peername.sa.s;
3297
3298                 tcp_seqno = si->io.pck_snd;
3299                 tcp_ack = si->io.pck_rcv;
3300                 tcp_ctl = 0x11; /* FIN, ACK */
3301
3302                 si->io.pck_snd += 1;
3303
3304                 break;
3305
3306         case SWRAP_CLOSE_RECV:
3307                 if (si->type != SOCK_STREAM) {
3308                         return NULL;
3309                 }
3310
3311                 dest_addr = &si->myname.sa.s;
3312                 src_addr  = &si->peername.sa.s;
3313
3314                 tcp_seqno = si->io.pck_rcv;
3315                 tcp_ack = si->io.pck_snd;
3316                 tcp_ctl = 0x11; /* FIN,ACK */
3317
3318                 si->io.pck_rcv += 1;
3319
3320                 break;
3321
3322         case SWRAP_CLOSE_ACK:
3323                 if (si->type != SOCK_STREAM) {
3324                         return NULL;
3325                 }
3326
3327                 src_addr  = &si->myname.sa.s;
3328                 dest_addr = &si->peername.sa.s;
3329
3330                 tcp_seqno = si->io.pck_snd;
3331                 tcp_ack = si->io.pck_rcv;
3332                 tcp_ctl = 0x10; /* ACK */
3333
3334                 break;
3335         default:
3336                 return NULL;
3337         }
3338
3339         swrapGetTimeOfDay(&tv);
3340
3341         return swrap_pcap_packet_init(&tv,
3342                                       src_addr,
3343                                       dest_addr,
3344                                       si->type,
3345                                       (const uint8_t *)buf,
3346                                       len,
3347                                       tcp_seqno,
3348                                       tcp_ack,
3349                                       tcp_ctl,
3350                                       unreachable,
3351                                       packet_len);
3352 }
3353
3354 static void swrap_pcap_dump_packet(struct socket_info *si,
3355                                    const struct sockaddr *addr,
3356                                    enum swrap_packet_type type,
3357                                    const void *buf, size_t len)
3358 {
3359         const char *file_name;
3360         uint8_t *packet;
3361         size_t packet_len = 0;
3362         int fd;
3363
3364         swrap_mutex_lock(&pcap_dump_mutex);
3365
3366         file_name = swrap_pcap_init_file();
3367         if (!file_name) {
3368                 goto done;
3369         }
3370
3371         packet = swrap_pcap_marshall_packet(si,
3372                                             addr,
3373                                             type,
3374                                             buf,
3375                                             len,
3376                                             &packet_len);
3377         if (packet == NULL) {
3378                 goto done;
3379         }
3380
3381         fd = swrap_pcap_get_fd(file_name);
3382         if (fd != -1) {
3383                 if (libc_write(fd, packet, packet_len) != (ssize_t)packet_len) {
3384                         free(packet);
3385                         goto done;
3386                 }
3387         }
3388
3389         free(packet);
3390
3391 done:
3392         swrap_mutex_unlock(&pcap_dump_mutex);
3393 }
3394
3395 /****************************************************************************
3396  *   SIGNALFD
3397  ***************************************************************************/
3398
3399 #ifdef HAVE_SIGNALFD
3400 static int swrap_signalfd(int fd, const sigset_t *mask, int flags)
3401 {
3402         int rc;
3403
3404         rc = libc_signalfd(fd, mask, flags);
3405         if (rc != -1) {
3406                 swrap_remove_stale(fd);
3407         }
3408
3409         return rc;
3410 }
3411
3412 int signalfd(int fd, const sigset_t *mask, int flags)
3413 {
3414         return swrap_signalfd(fd, mask, flags);
3415 }
3416 #endif
3417
3418 /****************************************************************************
3419  *   SOCKET
3420  ***************************************************************************/
3421
3422 static int swrap_socket(int family, int type, int protocol)
3423 {
3424         struct socket_info *si = NULL;
3425         struct socket_info _si = { 0 };
3426         int fd;
3427         int ret;
3428         int real_type = type;
3429
3430         /*
3431          * Remove possible addition flags passed to socket() so
3432          * do not fail checking the type.
3433          * See https://lwn.net/Articles/281965/
3434          */
3435 #ifdef SOCK_CLOEXEC
3436         real_type &= ~SOCK_CLOEXEC;
3437 #endif
3438 #ifdef SOCK_NONBLOCK
3439         real_type &= ~SOCK_NONBLOCK;
3440 #endif
3441
3442         if (!socket_wrapper_enabled()) {
3443                 return libc_socket(family, type, protocol);
3444         }
3445
3446         switch (family) {
3447         case AF_INET:
3448 #ifdef HAVE_IPV6
3449         case AF_INET6:
3450 #endif
3451                 break;
3452 #ifdef AF_NETLINK
3453         case AF_NETLINK:
3454 #endif /* AF_NETLINK */
3455 #ifdef AF_PACKET
3456         case AF_PACKET:
3457 #endif /* AF_PACKET */
3458         case AF_UNIX:
3459                 fd = libc_socket(family, type, protocol);
3460                 if (fd != -1) {
3461                         /* Check if we have a stale fd and remove it */
3462                         swrap_remove_stale(fd);
3463                         SWRAP_LOG(SWRAP_LOG_TRACE,
3464                                   "Unix socket fd=%d",
3465                                   fd);
3466                 }
3467                 return fd;
3468         default:
3469                 errno = EAFNOSUPPORT;
3470                 return -1;
3471         }
3472
3473         switch (real_type) {
3474         case SOCK_STREAM:
3475                 break;
3476         case SOCK_DGRAM:
3477                 break;
3478         default:
3479                 errno = EPROTONOSUPPORT;
3480                 return -1;
3481         }
3482
3483         switch (protocol) {
3484         case 0:
3485                 break;
3486         case 6:
3487                 if (real_type == SOCK_STREAM) {
3488                         break;
3489                 }
3490                 FALL_THROUGH;
3491         case 17:
3492                 if (real_type == SOCK_DGRAM) {
3493                         break;
3494                 }
3495                 FALL_THROUGH;
3496         default:
3497                 errno = EPROTONOSUPPORT;
3498                 return -1;
3499         }
3500
3501         /*
3502          * We must call libc_socket with type, from the caller, not the version
3503          * we removed SOCK_CLOEXEC and SOCK_NONBLOCK from
3504          */
3505         fd = libc_socket(AF_UNIX, type, 0);
3506
3507         if (fd == -1) {
3508                 return -1;
3509         }
3510
3511         /* Check if we have a stale fd and remove it */
3512         swrap_remove_stale(fd);
3513
3514         si = &_si;
3515         si->family = family;
3516
3517         /* however, the rest of the socket_wrapper code expects just
3518          * the type, not the flags */
3519         si->type = real_type;
3520         si->protocol = protocol;
3521
3522         /*
3523          * Setup myname so getsockname() can succeed to find out the socket
3524          * type.
3525          */
3526         switch(si->family) {
3527         case AF_INET: {
3528                 struct sockaddr_in sin = {
3529                         .sin_family = AF_INET,
3530                 };
3531
3532                 si->myname.sa_socklen = sizeof(struct sockaddr_in);
3533                 memcpy(&si->myname.sa.in, &sin, si->myname.sa_socklen);
3534                 break;
3535         }
3536 #ifdef HAVE_IPV6
3537         case AF_INET6: {
3538                 struct sockaddr_in6 sin6 = {
3539                         .sin6_family = AF_INET6,
3540                 };
3541
3542                 si->myname.sa_socklen = sizeof(struct sockaddr_in6);
3543                 memcpy(&si->myname.sa.in6, &sin6, si->myname.sa_socklen);
3544                 break;
3545         }
3546 #endif
3547         default:
3548                 errno = EINVAL;
3549                 return -1;
3550         }
3551
3552         ret = swrap_create_socket(si, fd);
3553         if (ret == -1) {
3554                 int saved_errno = errno;
3555                 libc_close(fd);
3556                 errno = saved_errno;
3557                 return -1;
3558         }
3559
3560         SWRAP_LOG(SWRAP_LOG_TRACE,
3561                   "Created %s socket for protocol %s, fd=%d",
3562                   family == AF_INET ? "IPv4" : "IPv6",
3563                   real_type == SOCK_DGRAM ? "UDP" : "TCP",
3564                   fd);
3565
3566         return fd;
3567 }
3568
3569 int socket(int family, int type, int protocol)
3570 {
3571         return swrap_socket(family, type, protocol);
3572 }
3573
3574 /****************************************************************************
3575  *   SOCKETPAIR
3576  ***************************************************************************/
3577
3578 static int swrap_socketpair(int family, int type, int protocol, int sv[2])
3579 {
3580         int rc;
3581
3582         rc = libc_socketpair(family, type, protocol, sv);
3583         if (rc != -1) {
3584                 swrap_remove_stale(sv[0]);
3585                 swrap_remove_stale(sv[1]);
3586         }
3587
3588         return rc;
3589 }
3590
3591 int socketpair(int family, int type, int protocol, int sv[2])
3592 {
3593         return swrap_socketpair(family, type, protocol, sv);
3594 }
3595
3596 /****************************************************************************
3597  *   SOCKETPAIR
3598  ***************************************************************************/
3599
3600 #ifdef HAVE_TIMERFD_CREATE
3601 static int swrap_timerfd_create(int clockid, int flags)
3602 {
3603         int fd;
3604
3605         fd = libc_timerfd_create(clockid, flags);
3606         if (fd != -1) {
3607                 swrap_remove_stale(fd);
3608         }
3609
3610         return fd;
3611 }
3612
3613 int timerfd_create(int clockid, int flags)
3614 {
3615         return swrap_timerfd_create(clockid, flags);
3616 }
3617 #endif
3618
3619 /****************************************************************************
3620  *   PIPE
3621  ***************************************************************************/
3622
3623 static int swrap_pipe(int pipefd[2])
3624 {
3625         int rc;
3626
3627         rc = libc_pipe(pipefd);
3628         if (rc != -1) {
3629                 swrap_remove_stale(pipefd[0]);
3630                 swrap_remove_stale(pipefd[1]);
3631         }
3632
3633         return rc;
3634 }
3635
3636 int pipe(int pipefd[2])
3637 {
3638         return swrap_pipe(pipefd);
3639 }
3640
3641 /****************************************************************************
3642  *   ACCEPT
3643  ***************************************************************************/
3644
3645 static int swrap_accept(int s,
3646                         struct sockaddr *addr,
3647                         socklen_t *addrlen,
3648                         int flags)
3649 {
3650         struct socket_info *parent_si, *child_si;
3651         struct socket_info new_si = { 0 };
3652         int fd;
3653         int idx;
3654         struct swrap_address un_addr = {
3655                 .sa_socklen = sizeof(struct sockaddr_un),
3656         };
3657         struct swrap_address un_my_addr = {
3658                 .sa_socklen = sizeof(struct sockaddr_un),
3659         };
3660         struct swrap_address in_addr = {
3661                 .sa_socklen = sizeof(struct sockaddr_storage),
3662         };
3663         struct swrap_address in_my_addr = {
3664                 .sa_socklen = sizeof(struct sockaddr_storage),
3665         };
3666         int ret;
3667
3668         parent_si = find_socket_info(s);
3669         if (!parent_si) {
3670 #ifdef HAVE_ACCEPT4
3671                 return libc_accept4(s, addr, addrlen, flags);
3672 #else
3673                 UNUSED(flags);
3674                 return libc_accept(s, addr, addrlen);
3675 #endif
3676         }
3677
3678
3679         /*
3680          * prevent parent_si from being altered / closed
3681          * while we read it
3682          */
3683         SWRAP_LOCK_SI(parent_si);
3684
3685         /*
3686          * assume out sockaddr have the same size as the in parent
3687          * socket family
3688          */
3689         in_addr.sa_socklen = socket_length(parent_si->family);
3690         if (in_addr.sa_socklen <= 0) {
3691                 SWRAP_UNLOCK_SI(parent_si);
3692                 errno = EINVAL;
3693                 return -1;
3694         }
3695
3696         SWRAP_UNLOCK_SI(parent_si);
3697
3698 #ifdef HAVE_ACCEPT4
3699         ret = libc_accept4(s, &un_addr.sa.s, &un_addr.sa_socklen, flags);
3700 #else
3701         UNUSED(flags);
3702         ret = libc_accept(s, &un_addr.sa.s, &un_addr.sa_socklen);
3703 #endif
3704         if (ret == -1) {
3705                 int saved_errno = errno;
3706                 if (saved_errno == ENOTSOCK) {
3707                         /* Remove stale fds */
3708                         swrap_remove_stale(s);
3709                 }
3710                 errno = saved_errno;
3711                 return ret;
3712         }
3713
3714         fd = ret;
3715
3716         /* Check if we have a stale fd and remove it */
3717         swrap_remove_stale(fd);
3718
3719         if (un_addr.sa.un.sun_path[0] == '\0') {
3720                 /*
3721                  * FreeBSD seems to have a problem where
3722                  * accept4() on the unix socket doesn't
3723                  * ECONNABORTED for already disconnected connections.
3724                  *
3725                  * Let's try libc_getpeername() to get the peer address
3726                  * as a fallback, but it'll likely return ENOTCONN,
3727                  * which we have to map to ECONNABORTED.
3728                  */
3729                 un_addr.sa_socklen = sizeof(struct sockaddr_un),
3730                 ret = libc_getpeername(fd, &un_addr.sa.s, &un_addr.sa_socklen);
3731                 if (ret == -1) {
3732                         int saved_errno = errno;
3733                         libc_close(fd);
3734                         if (saved_errno == ENOTCONN) {
3735                                 /*
3736                                  * If the connection is already disconnected
3737                                  * we should return ECONNABORTED.
3738                                  */
3739                                 saved_errno = ECONNABORTED;
3740                         }
3741                         errno = saved_errno;
3742                         return ret;
3743                 }
3744         }
3745
3746         ret = libc_getsockname(fd,
3747                                &un_my_addr.sa.s,
3748                                &un_my_addr.sa_socklen);
3749         if (ret == -1) {
3750                 int saved_errno = errno;
3751                 libc_close(fd);
3752                 if (saved_errno == ENOTCONN) {
3753                         /*
3754                          * If the connection is already disconnected
3755                          * we should return ECONNABORTED.
3756                          */
3757                         saved_errno = ECONNABORTED;
3758                 }
3759                 errno = saved_errno;
3760                 return ret;
3761         }
3762
3763         SWRAP_LOCK_SI(parent_si);
3764
3765         ret = sockaddr_convert_from_un(parent_si,
3766                                        &un_addr.sa.un,
3767                                        un_addr.sa_socklen,
3768                                        parent_si->family,
3769                                        &in_addr.sa.s,
3770                                        &in_addr.sa_socklen);
3771         if (ret == -1) {
3772                 int saved_errno = errno;
3773                 SWRAP_UNLOCK_SI(parent_si);
3774                 libc_close(fd);
3775                 errno = saved_errno;
3776                 return ret;
3777         }
3778
3779         child_si = &new_si;
3780
3781         child_si->family = parent_si->family;
3782         child_si->type = parent_si->type;
3783         child_si->protocol = parent_si->protocol;
3784         child_si->bound = 1;
3785         child_si->is_server = 1;
3786         child_si->connected = 1;
3787
3788         SWRAP_UNLOCK_SI(parent_si);
3789
3790         child_si->peername = (struct swrap_address) {
3791                 .sa_socklen = in_addr.sa_socklen,
3792         };
3793         memcpy(&child_si->peername.sa.ss, &in_addr.sa.ss, in_addr.sa_socklen);
3794
3795         if (addr != NULL && addrlen != NULL) {
3796                 size_t copy_len = MIN(*addrlen, in_addr.sa_socklen);
3797                 if (copy_len > 0) {
3798                         memcpy(addr, &in_addr.sa.ss, copy_len);
3799                 }
3800                 *addrlen = in_addr.sa_socklen;
3801         }
3802
3803         ret = sockaddr_convert_from_un(child_si,
3804                                        &un_my_addr.sa.un,
3805                                        un_my_addr.sa_socklen,
3806                                        child_si->family,
3807                                        &in_my_addr.sa.s,
3808                                        &in_my_addr.sa_socklen);
3809         if (ret == -1) {
3810                 int saved_errno = errno;
3811                 libc_close(fd);
3812                 errno = saved_errno;
3813                 return ret;
3814         }
3815
3816         SWRAP_LOG(SWRAP_LOG_TRACE,
3817                   "accept() path=%s, fd=%d",
3818                   un_my_addr.sa.un.sun_path, s);
3819
3820         child_si->myname = (struct swrap_address) {
3821                 .sa_socklen = in_my_addr.sa_socklen,
3822         };
3823         memcpy(&child_si->myname.sa.ss, &in_my_addr.sa.ss, in_my_addr.sa_socklen);
3824
3825         idx = swrap_create_socket(&new_si, fd);
3826         if (idx == -1) {
3827                 int saved_errno = errno;
3828                 libc_close(fd);
3829                 errno = saved_errno;
3830                 return -1;
3831         }
3832
3833         if (addr != NULL) {
3834                 struct socket_info *si = swrap_get_socket_info(idx);
3835
3836                 SWRAP_LOCK_SI(si);
3837                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_SEND, NULL, 0);
3838                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_RECV, NULL, 0);
3839                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_ACK, NULL, 0);
3840                 SWRAP_UNLOCK_SI(si);
3841         }
3842
3843         return fd;
3844 }
3845
3846 #ifdef HAVE_ACCEPT4
3847 int accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags)
3848 {
3849         return swrap_accept(s, addr, (socklen_t *)addrlen, flags);
3850 }
3851 #endif
3852
3853 #ifdef HAVE_ACCEPT_PSOCKLEN_T
3854 int accept(int s, struct sockaddr *addr, Psocklen_t addrlen)
3855 #else
3856 int accept(int s, struct sockaddr *addr, socklen_t *addrlen)
3857 #endif
3858 {
3859         return swrap_accept(s, addr, (socklen_t *)addrlen, 0);
3860 }
3861
3862 static int autobind_start_init;
3863 static int autobind_start;
3864
3865 /* using sendto() or connect() on an unbound socket would give the
3866    recipient no way to reply, as unlike UDP and TCP, a unix domain
3867    socket can't auto-assign ephemeral port numbers, so we need to
3868    assign it here.
3869    Note: this might change the family from ipv6 to ipv4
3870 */
3871 static int swrap_auto_bind(int fd, struct socket_info *si, int family)
3872 {
3873         struct swrap_address un_addr = {
3874                 .sa_socklen = sizeof(struct sockaddr_un),
3875         };
3876         int i;
3877         char type;
3878         int ret;
3879         int port;
3880         char *swrap_dir = NULL;
3881
3882         swrap_mutex_lock(&autobind_start_mutex);
3883
3884         if (autobind_start_init != 1) {
3885                 autobind_start_init = 1;
3886                 autobind_start = getpid();
3887                 autobind_start %= 50000;
3888                 autobind_start += 10000;
3889         }
3890
3891         un_addr.sa.un.sun_family = AF_UNIX;
3892
3893         switch (family) {
3894         case AF_INET: {
3895                 struct sockaddr_in in;
3896
3897                 switch (si->type) {
3898                 case SOCK_STREAM:
3899                         type = SOCKET_TYPE_CHAR_TCP;
3900                         break;
3901                 case SOCK_DGRAM:
3902                         type = SOCKET_TYPE_CHAR_UDP;
3903                         break;
3904                 default:
3905                         errno = ESOCKTNOSUPPORT;
3906                         ret = -1;
3907                         goto done;
3908                 }
3909
3910                 memset(&in, 0, sizeof(in));
3911                 in.sin_family = AF_INET;
3912                 in.sin_addr.s_addr = htonl(swrap_ipv4_iface(
3913                                            socket_wrapper_default_iface()));
3914
3915                 si->myname = (struct swrap_address) {
3916                         .sa_socklen = sizeof(in),
3917                 };
3918                 memcpy(&si->myname.sa.in, &in, si->myname.sa_socklen);
3919                 break;
3920         }
3921 #ifdef HAVE_IPV6
3922         case AF_INET6: {
3923                 struct sockaddr_in6 in6;
3924
3925                 if (si->family != family) {
3926                         errno = ENETUNREACH;
3927                         ret = -1;
3928                         goto done;
3929                 }
3930
3931                 switch (si->type) {
3932                 case SOCK_STREAM:
3933                         type = SOCKET_TYPE_CHAR_TCP_V6;
3934                         break;
3935                 case SOCK_DGRAM:
3936                         type = SOCKET_TYPE_CHAR_UDP_V6;
3937                         break;
3938                 default:
3939                         errno = ESOCKTNOSUPPORT;
3940                         ret = -1;
3941                         goto done;
3942                 }
3943
3944                 memset(&in6, 0, sizeof(in6));
3945                 in6.sin6_family = AF_INET6;
3946                 in6.sin6_addr = *swrap_ipv6();
3947                 in6.sin6_addr.s6_addr[15] = socket_wrapper_default_iface();
3948
3949                 si->myname = (struct swrap_address) {
3950                         .sa_socklen = sizeof(in6),
3951                 };
3952                 memcpy(&si->myname.sa.in6, &in6, si->myname.sa_socklen);
3953                 break;
3954         }
3955 #endif
3956         default:
3957                 errno = ESOCKTNOSUPPORT;
3958                 ret = -1;
3959                 goto done;
3960         }
3961
3962         if (autobind_start > 60000) {
3963                 autobind_start = 10000;
3964         }
3965
3966         swrap_dir = socket_wrapper_dir();
3967         if (swrap_dir == NULL) {
3968                 errno = EINVAL;
3969                 ret = -1;
3970                 goto done;
3971         }
3972
3973         for (i = 0; i < SOCKET_MAX_SOCKETS; i++) {
3974                 port = autobind_start + i;
3975                 swrap_un_path(&un_addr.sa.un,
3976                               swrap_dir,
3977                               type,
3978                               socket_wrapper_default_iface(),
3979                               port);
3980
3981                 ret = libc_bind(fd, &un_addr.sa.s, un_addr.sa_socklen);
3982                 if (ret == -1) {
3983                         if (errno == EALREADY || errno == EADDRINUSE) {
3984                                 continue;
3985                         }
3986                         goto done;
3987                 }
3988
3989                 si->un_addr = un_addr.sa.un;
3990
3991                 si->bound = 1;
3992                 autobind_start = port + 1;
3993                 break;
3994         }
3995         if (i == SOCKET_MAX_SOCKETS) {
3996                 SWRAP_LOG(SWRAP_LOG_ERROR, "Too many open unix sockets (%u) for "
3997                                            "interface "SOCKET_FORMAT,
3998                                            SOCKET_MAX_SOCKETS,
3999                                            type,
4000                                            socket_wrapper_default_iface(),
4001                                            0);
4002                 errno = ENFILE;
4003                 ret = -1;
4004                 goto done;
4005         }
4006
4007         si->family = family;
4008         set_port(si->family, port, &si->myname);
4009
4010         ret = 0;
4011
4012 done:
4013         SAFE_FREE(swrap_dir);
4014         swrap_mutex_unlock(&autobind_start_mutex);
4015         return ret;
4016 }
4017
4018 /****************************************************************************
4019  *   CONNECT
4020  ***************************************************************************/
4021
4022 static int swrap_connect(int s, const struct sockaddr *serv_addr,
4023                          socklen_t addrlen)
4024 {
4025         int ret;
4026         struct swrap_address un_addr = {
4027                 .sa_socklen = sizeof(struct sockaddr_un),
4028         };
4029         struct socket_info *si = find_socket_info(s);
4030         struct swrap_sockaddr_buf buf = {};
4031         int bcast = 0;
4032
4033         if (!si) {
4034                 return libc_connect(s, serv_addr, addrlen);
4035         }
4036
4037         SWRAP_LOCK_SI(si);
4038
4039         if (si->bound == 0) {
4040                 ret = swrap_auto_bind(s, si, serv_addr->sa_family);
4041                 if (ret == -1) {
4042                         goto done;
4043                 }
4044         }
4045
4046         if (si->family != serv_addr->sa_family) {
4047                 SWRAP_LOG(SWRAP_LOG_ERROR,
4048                           "called for fd=%d (family=%d) called with invalid family=%d",
4049                           s, si->family, serv_addr->sa_family);
4050                 errno = EINVAL;
4051                 ret = -1;
4052                 goto done;
4053         }
4054
4055         ret = sockaddr_convert_to_un(si, serv_addr,
4056                                      addrlen, &un_addr.sa.un, 0, &bcast);
4057         if (ret == -1) {
4058                 goto done;
4059         }
4060
4061         if (bcast) {
4062                 errno = ENETUNREACH;
4063                 ret = -1;
4064                 goto done;
4065         }
4066
4067         if (si->type == SOCK_DGRAM) {
4068                 si->defer_connect = 1;
4069                 ret = 0;
4070         } else {
4071                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_SEND, NULL, 0);
4072
4073                 ret = libc_connect(s,
4074                                    &un_addr.sa.s,
4075                                    un_addr.sa_socklen);
4076         }
4077
4078         SWRAP_LOG(SWRAP_LOG_TRACE,
4079                   "connect(%s) path=%s, fd=%d",
4080                   swrap_sockaddr_string(&buf, serv_addr),
4081                   un_addr.sa.un.sun_path, s);
4082
4083
4084         /* to give better errors */
4085         if (ret == -1 && errno == ENOENT) {
4086                 errno = EHOSTUNREACH;
4087         }
4088
4089         if (ret == 0) {
4090                 si->peername = (struct swrap_address) {
4091                         .sa_socklen = addrlen,
4092                 };
4093
4094                 memcpy(&si->peername.sa.ss, serv_addr, addrlen);
4095                 si->connected = 1;
4096
4097                 /*
4098                  * When we connect() on a socket than we have to bind the
4099                  * outgoing connection on the interface we use for the
4100                  * transport. We already bound it on the right interface
4101                  * but here we have to update the name so getsockname()
4102                  * returns correct information.
4103                  */
4104                 if (si->bindname.sa_socklen > 0) {
4105                         si->myname = (struct swrap_address) {
4106                                 .sa_socklen = si->bindname.sa_socklen,
4107                         };
4108
4109                         memcpy(&si->myname.sa.ss,
4110                                &si->bindname.sa.ss,
4111                                si->bindname.sa_socklen);
4112
4113                         /* Cleanup bindname */
4114                         si->bindname = (struct swrap_address) {
4115                                 .sa_socklen = 0,
4116                         };
4117                 }
4118
4119                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_RECV, NULL, 0);
4120                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_ACK, NULL, 0);
4121         } else {
4122                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_UNREACH, NULL, 0);
4123         }
4124
4125 done:
4126         SWRAP_UNLOCK_SI(si);
4127         return ret;
4128 }
4129
4130 int connect(int s, const struct sockaddr *serv_addr, socklen_t addrlen)
4131 {
4132         return swrap_connect(s, serv_addr, addrlen);
4133 }
4134
4135 /****************************************************************************
4136  *   BIND
4137  ***************************************************************************/
4138
4139 static int swrap_bind(int s, const struct sockaddr *myaddr, socklen_t addrlen)
4140 {
4141         int ret;
4142         struct swrap_address un_addr = {
4143                 .sa_socklen = sizeof(struct sockaddr_un),
4144         };
4145         struct socket_info *si = find_socket_info(s);
4146         struct swrap_sockaddr_buf buf = {};
4147         int ret_errno = errno;
4148         int bind_error = 0;
4149 #if 0 /* FIXME */
4150         bool in_use;
4151 #endif
4152
4153         if (!si) {
4154                 return libc_bind(s, myaddr, addrlen);
4155         }
4156
4157         SWRAP_LOCK_SI(si);
4158
4159         switch (si->family) {
4160         case AF_INET: {
4161                 const struct sockaddr_in *sin;
4162                 if (addrlen < sizeof(struct sockaddr_in)) {
4163                         bind_error = EINVAL;
4164                         break;
4165                 }
4166
4167                 sin = (const struct sockaddr_in *)(const void *)myaddr;
4168
4169                 if (sin->sin_family != AF_INET) {
4170                         bind_error = EAFNOSUPPORT;
4171                 }
4172
4173                 /* special case for AF_UNSPEC */
4174                 if (sin->sin_family == AF_UNSPEC &&
4175                     (sin->sin_addr.s_addr == htonl(INADDR_ANY)))
4176                 {
4177                         bind_error = 0;
4178                 }
4179
4180                 break;
4181         }
4182 #ifdef HAVE_IPV6
4183         case AF_INET6: {
4184                 const struct sockaddr_in6 *sin6;
4185                 if (addrlen < sizeof(struct sockaddr_in6)) {
4186                         bind_error = EINVAL;
4187                         break;
4188                 }
4189
4190                 sin6 = (const struct sockaddr_in6 *)(const void *)myaddr;
4191
4192                 if (sin6->sin6_family != AF_INET6) {
4193                         bind_error = EAFNOSUPPORT;
4194                 }
4195
4196                 break;
4197         }
4198 #endif
4199         default:
4200                 bind_error = EINVAL;
4201                 break;
4202         }
4203
4204         if (bind_error != 0) {
4205                 ret_errno = bind_error;
4206                 ret = -1;
4207                 goto out;
4208         }
4209
4210 #if 0 /* FIXME */
4211         in_use = check_addr_port_in_use(myaddr, addrlen);
4212         if (in_use) {
4213                 errno = EADDRINUSE;
4214                 ret = -1;
4215                 goto out;
4216         }
4217 #endif
4218
4219         si->myname.sa_socklen = addrlen;
4220         memcpy(&si->myname.sa.ss, myaddr, addrlen);
4221
4222         ret = sockaddr_convert_to_un(si,
4223                                      myaddr,
4224                                      addrlen,
4225                                      &un_addr.sa.un,
4226                                      1,
4227                                      &si->bcast);
4228         if (ret == -1) {
4229                 ret_errno = errno;
4230                 goto out;
4231         }
4232
4233         unlink(un_addr.sa.un.sun_path);
4234
4235         ret = libc_bind(s, &un_addr.sa.s, un_addr.sa_socklen);
4236         if (ret == -1) {
4237                 ret_errno = errno;
4238         }
4239
4240         SWRAP_LOG(SWRAP_LOG_TRACE,
4241                   "bind(%s) path=%s, fd=%d ret=%d ret_errno=%d",
4242                   swrap_sockaddr_string(&buf, myaddr),
4243                   un_addr.sa.un.sun_path, s, ret, ret_errno);
4244
4245         if (ret == 0) {
4246                 si->bound = 1;
4247         }
4248
4249 out:
4250         SWRAP_UNLOCK_SI(si);
4251         errno = ret_errno;
4252         return ret;
4253 }
4254
4255 int bind(int s, const struct sockaddr *myaddr, socklen_t addrlen)
4256 {
4257         return swrap_bind(s, myaddr, addrlen);
4258 }
4259
4260 /****************************************************************************
4261  *   BINDRESVPORT
4262  ***************************************************************************/
4263
4264 #ifdef HAVE_BINDRESVPORT
4265 static int swrap_getsockname(int s, struct sockaddr *name, socklen_t *addrlen);
4266
4267 static int swrap_bindresvport_sa(int sd, struct sockaddr *sa)
4268 {
4269         struct swrap_address myaddr = {
4270                 .sa_socklen = sizeof(struct sockaddr_storage),
4271         };
4272         socklen_t salen;
4273         static uint16_t port;
4274         uint16_t i;
4275         int rc = -1;
4276         int af;
4277
4278 #define SWRAP_STARTPORT 600
4279 #define SWRAP_ENDPORT (IPPORT_RESERVED - 1)
4280 #define SWRAP_NPORTS (SWRAP_ENDPORT - SWRAP_STARTPORT + 1)
4281
4282         if (port == 0) {
4283                 port = (getpid() % SWRAP_NPORTS) + SWRAP_STARTPORT;
4284         }
4285
4286         if (sa == NULL) {
4287                 salen = myaddr.sa_socklen;
4288                 sa = &myaddr.sa.s;
4289
4290                 rc = swrap_getsockname(sd, &myaddr.sa.s, &salen);
4291                 if (rc < 0) {
4292                         return -1;
4293                 }
4294
4295                 af = sa->sa_family;
4296                 memset(&myaddr.sa.ss, 0, salen);
4297         } else {
4298                 af = sa->sa_family;
4299         }
4300
4301         for (i = 0; i < SWRAP_NPORTS; i++, port++) {
4302                 switch(af) {
4303                 case AF_INET: {
4304                         struct sockaddr_in *sinp = (struct sockaddr_in *)(void *)sa;
4305
4306                         salen = sizeof(struct sockaddr_in);
4307                         sinp->sin_port = htons(port);
4308                         break;
4309                 }
4310                 case AF_INET6: {
4311                         struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)(void *)sa;
4312
4313                         salen = sizeof(struct sockaddr_in6);
4314                         sin6p->sin6_port = htons(port);
4315                         break;
4316                 }
4317                 default:
4318                         errno = EAFNOSUPPORT;
4319                         return -1;
4320                 }
4321                 sa->sa_family = af;
4322
4323                 if (port > SWRAP_ENDPORT) {
4324                         port = SWRAP_STARTPORT;
4325                 }
4326
4327                 rc = swrap_bind(sd, (struct sockaddr *)sa, salen);
4328                 if (rc == 0 || errno != EADDRINUSE) {
4329                         break;
4330                 }
4331         }
4332
4333         return rc;
4334 }
4335
4336 int bindresvport(int sockfd, struct sockaddr_in *sinp)
4337 {
4338         return swrap_bindresvport_sa(sockfd, (struct sockaddr *)sinp);
4339 }
4340 #endif
4341
4342 /****************************************************************************
4343  *   LISTEN
4344  ***************************************************************************/
4345
4346 static int swrap_listen(int s, int backlog)
4347 {
4348         int ret;
4349         struct socket_info *si = find_socket_info(s);
4350
4351         if (!si) {
4352                 return libc_listen(s, backlog);
4353         }
4354
4355         SWRAP_LOCK_SI(si);
4356
4357         if (si->bound == 0) {
4358                 ret = swrap_auto_bind(s, si, si->family);
4359                 if (ret == -1) {
4360                         errno = EADDRINUSE;
4361                         goto out;
4362                 }
4363         }
4364
4365         ret = libc_listen(s, backlog);
4366         if (ret == 0) {
4367                 si->listening = 1;
4368         }
4369
4370 out:
4371         SWRAP_UNLOCK_SI(si);
4372
4373         return ret;
4374 }
4375
4376 int listen(int s, int backlog)
4377 {
4378         return swrap_listen(s, backlog);
4379 }
4380
4381 /****************************************************************************
4382  *   FOPEN
4383  ***************************************************************************/
4384
4385 static FILE *swrap_fopen(const char *name, const char *mode)
4386 {
4387         FILE *fp;
4388
4389         fp = libc_fopen(name, mode);
4390         if (fp != NULL) {
4391                 int fd = fileno(fp);
4392
4393                 swrap_remove_stale(fd);
4394         }
4395
4396         return fp;
4397 }
4398
4399 FILE *fopen(const char *name, const char *mode)
4400 {
4401         return swrap_fopen(name, mode);
4402 }
4403
4404 /****************************************************************************
4405  *   FOPEN64
4406  ***************************************************************************/
4407
4408 #ifdef HAVE_FOPEN64
4409 static FILE *swrap_fopen64(const char *name, const char *mode)
4410 {
4411         FILE *fp;
4412
4413         fp = libc_fopen64(name, mode);
4414         if (fp != NULL) {
4415                 int fd = fileno(fp);
4416
4417                 swrap_remove_stale(fd);
4418         }
4419
4420         return fp;
4421 }
4422
4423 FILE *fopen64(const char *name, const char *mode)
4424 {
4425         return swrap_fopen64(name, mode);
4426 }
4427 #endif /* HAVE_FOPEN64 */
4428
4429 /****************************************************************************
4430  *   OPEN
4431  ***************************************************************************/
4432
4433 static int swrap_vopen(const char *pathname, int flags, va_list ap)
4434 {
4435         int ret;
4436
4437         ret = libc_vopen(pathname, flags, ap);
4438         if (ret != -1) {
4439                 /*
4440                  * There are methods for closing descriptors (libc-internal code
4441                  * paths, direct syscalls) which close descriptors in ways that
4442                  * we can't intercept, so try to recover when we notice that
4443                  * that's happened
4444                  */
4445                 swrap_remove_stale(ret);
4446         }
4447         return ret;
4448 }
4449
4450 int open(const char *pathname, int flags, ...)
4451 {
4452         va_list ap;
4453         int fd;
4454
4455         va_start(ap, flags);
4456         fd = swrap_vopen(pathname, flags, ap);
4457         va_end(ap);
4458
4459         return fd;
4460 }
4461
4462 /****************************************************************************
4463  *   OPEN64
4464  ***************************************************************************/
4465
4466 #ifdef HAVE_OPEN64
4467 static int swrap_vopen64(const char *pathname, int flags, va_list ap)
4468 {
4469         int ret;
4470
4471         ret = libc_vopen64(pathname, flags, ap);
4472         if (ret != -1) {
4473                 /*
4474                  * There are methods for closing descriptors (libc-internal code
4475                  * paths, direct syscalls) which close descriptors in ways that
4476                  * we can't intercept, so try to recover when we notice that
4477                  * that's happened
4478                  */
4479                 swrap_remove_stale(ret);
4480         }
4481         return ret;
4482 }
4483
4484 int open64(const char *pathname, int flags, ...)
4485 {
4486         va_list ap;
4487         int fd;
4488
4489         va_start(ap, flags);
4490         fd = swrap_vopen64(pathname, flags, ap);
4491         va_end(ap);
4492
4493         return fd;
4494 }
4495 #endif /* HAVE_OPEN64 */
4496
4497 /****************************************************************************
4498  *   OPENAT
4499  ***************************************************************************/
4500
4501 static int swrap_vopenat(int dirfd, const char *path, int flags, va_list ap)
4502 {
4503         int ret;
4504
4505         ret = libc_vopenat(dirfd, path, flags, ap);
4506         if (ret != -1) {
4507                 /*
4508                  * There are methods for closing descriptors (libc-internal code
4509                  * paths, direct syscalls) which close descriptors in ways that
4510                  * we can't intercept, so try to recover when we notice that
4511                  * that's happened
4512                  */
4513                 swrap_remove_stale(ret);
4514         }
4515
4516         return ret;
4517 }
4518
4519 int openat(int dirfd, const char *path, int flags, ...)
4520 {
4521         va_list ap;
4522         int fd;
4523
4524         va_start(ap, flags);
4525         fd = swrap_vopenat(dirfd, path, flags, ap);
4526         va_end(ap);
4527
4528         return fd;
4529 }
4530
4531 /****************************************************************************
4532  *   GETPEERNAME
4533  ***************************************************************************/
4534
4535 static int swrap_getpeername(int s, struct sockaddr *name, socklen_t *addrlen)
4536 {
4537         struct socket_info *si = find_socket_info(s);
4538         socklen_t len;
4539         int ret = -1;
4540
4541         if (!si) {
4542                 return libc_getpeername(s, name, addrlen);
4543         }
4544
4545         SWRAP_LOCK_SI(si);
4546
4547         if (si->peername.sa_socklen == 0)
4548         {
4549                 errno = ENOTCONN;
4550                 goto out;
4551         }
4552
4553         len = MIN(*addrlen, si->peername.sa_socklen);
4554         if (len == 0) {
4555                 ret = 0;
4556                 goto out;
4557         }
4558
4559         memcpy(name, &si->peername.sa.ss, len);
4560         *addrlen = si->peername.sa_socklen;
4561
4562         ret = 0;
4563 out:
4564         SWRAP_UNLOCK_SI(si);
4565
4566         return ret;
4567 }
4568
4569 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4570 int getpeername(int s, struct sockaddr *name, Psocklen_t addrlen)
4571 #else
4572 int getpeername(int s, struct sockaddr *name, socklen_t *addrlen)
4573 #endif
4574 {
4575         return swrap_getpeername(s, name, (socklen_t *)addrlen);
4576 }
4577
4578 /****************************************************************************
4579  *   GETSOCKNAME
4580  ***************************************************************************/
4581
4582 static int swrap_getsockname(int s, struct sockaddr *name, socklen_t *addrlen)
4583 {
4584         struct socket_info *si = find_socket_info(s);
4585         socklen_t len;
4586         int ret = -1;
4587
4588         if (!si) {
4589                 return libc_getsockname(s, name, addrlen);
4590         }
4591
4592         SWRAP_LOCK_SI(si);
4593
4594         len = MIN(*addrlen, si->myname.sa_socklen);
4595         if (len == 0) {
4596                 ret = 0;
4597                 goto out;
4598         }
4599
4600         memcpy(name, &si->myname.sa.ss, len);
4601         *addrlen = si->myname.sa_socklen;
4602
4603         ret = 0;
4604 out:
4605         SWRAP_UNLOCK_SI(si);
4606
4607         return ret;
4608 }
4609
4610 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4611 int getsockname(int s, struct sockaddr *name, Psocklen_t addrlen)
4612 #else
4613 int getsockname(int s, struct sockaddr *name, socklen_t *addrlen)
4614 #endif
4615 {
4616         return swrap_getsockname(s, name, (socklen_t *)addrlen);
4617 }
4618
4619 /****************************************************************************
4620  *   GETSOCKOPT
4621  ***************************************************************************/
4622
4623 #ifndef SO_PROTOCOL
4624 # ifdef SO_PROTOTYPE /* The Solaris name */
4625 #  define SO_PROTOCOL SO_PROTOTYPE
4626 # endif /* SO_PROTOTYPE */
4627 #endif /* SO_PROTOCOL */
4628
4629 static int swrap_getsockopt(int s, int level, int optname,
4630                             void *optval, socklen_t *optlen)
4631 {
4632         struct socket_info *si = find_socket_info(s);
4633         int ret;
4634
4635         if (!si) {
4636                 return libc_getsockopt(s,
4637                                        level,
4638                                        optname,
4639                                        optval,
4640                                        optlen);
4641         }
4642
4643         SWRAP_LOCK_SI(si);
4644
4645         if (level == SOL_SOCKET) {
4646                 switch (optname) {
4647 #ifdef SO_DOMAIN
4648                 case SO_DOMAIN:
4649                         if (optval == NULL || optlen == NULL ||
4650                             *optlen < (socklen_t)sizeof(int)) {
4651                                 errno = EINVAL;
4652                                 ret = -1;
4653                                 goto done;
4654                         }
4655
4656                         *optlen = sizeof(int);
4657                         *(int *)optval = si->family;
4658                         ret = 0;
4659                         goto done;
4660 #endif /* SO_DOMAIN */
4661
4662 #ifdef SO_PROTOCOL
4663                 case SO_PROTOCOL:
4664                         if (optval == NULL || optlen == NULL ||
4665                             *optlen < (socklen_t)sizeof(int)) {
4666                                 errno = EINVAL;
4667                                 ret = -1;
4668                                 goto done;
4669                         }
4670
4671                         *optlen = sizeof(int);
4672                         *(int *)optval = si->protocol;
4673                         ret = 0;
4674                         goto done;
4675 #endif /* SO_PROTOCOL */
4676                 case SO_TYPE:
4677                         if (optval == NULL || optlen == NULL ||
4678                             *optlen < (socklen_t)sizeof(int)) {
4679                                 errno = EINVAL;
4680                                 ret = -1;
4681                                 goto done;
4682                         }
4683
4684                         *optlen = sizeof(int);
4685                         *(int *)optval = si->type;
4686                         ret = 0;
4687                         goto done;
4688                 default:
4689                         ret = libc_getsockopt(s,
4690                                               level,
4691                                               optname,
4692                                               optval,
4693                                               optlen);
4694                         goto done;
4695                 }
4696         } else if (level == IPPROTO_TCP) {
4697                 switch (optname) {
4698 #ifdef TCP_NODELAY
4699                 case TCP_NODELAY:
4700                         /*
4701                          * This enables sending packets directly out over TCP.
4702                          * As a unix socket is doing that any way, report it as
4703                          * enabled.
4704                          */
4705                         if (optval == NULL || optlen == NULL ||
4706                             *optlen < (socklen_t)sizeof(int)) {
4707                                 errno = EINVAL;
4708                                 ret = -1;
4709                                 goto done;
4710                         }
4711
4712                         *optlen = sizeof(int);
4713                         *(int *)optval = si->tcp_nodelay;
4714
4715                         ret = 0;
4716                         goto done;
4717 #endif /* TCP_NODELAY */
4718 #ifdef TCP_INFO
4719                 case TCP_INFO: {
4720                         struct tcp_info info;
4721                         socklen_t ilen = sizeof(info);
4722
4723 #ifdef HAVE_NETINET_TCP_FSM_H
4724 /* This is FreeBSD */
4725 # define __TCP_LISTEN TCPS_LISTEN
4726 # define __TCP_ESTABLISHED TCPS_ESTABLISHED
4727 # define __TCP_CLOSE TCPS_CLOSED
4728 #else
4729 /* This is Linux */
4730 # define __TCP_LISTEN TCP_LISTEN
4731 # define __TCP_ESTABLISHED TCP_ESTABLISHED
4732 # define __TCP_CLOSE TCP_CLOSE
4733 #endif
4734
4735                         ZERO_STRUCT(info);
4736                         if (si->listening) {
4737                                 info.tcpi_state = __TCP_LISTEN;
4738                         } else if (si->connected) {
4739                                 /*
4740                                  * For now we just fake a few values
4741                                  * supported both by FreeBSD and Linux
4742                                  */
4743                                 info.tcpi_state = __TCP_ESTABLISHED;
4744                                 info.tcpi_rto = 200000;  /* 200 msec */
4745                                 info.tcpi_rtt = 5000;    /* 5 msec */
4746                                 info.tcpi_rttvar = 5000; /* 5 msec */
4747                         } else {
4748                                 info.tcpi_state = __TCP_CLOSE;
4749                                 info.tcpi_rto = 1000000;  /* 1 sec */
4750                                 info.tcpi_rtt = 0;
4751                                 info.tcpi_rttvar = 250000; /* 250 msec */
4752                         }
4753
4754                         if (optval == NULL || optlen == NULL ||
4755                             *optlen < (socklen_t)ilen) {
4756                                 errno = EINVAL;
4757                                 ret = -1;
4758                                 goto done;
4759                         }
4760
4761                         *optlen = ilen;
4762                         memcpy(optval, &info, ilen);
4763
4764                         ret = 0;
4765                         goto done;
4766                 }
4767 #endif /* TCP_INFO */
4768                 default:
4769                         break;
4770                 }
4771         }
4772
4773         errno = ENOPROTOOPT;
4774         ret = -1;
4775
4776 done:
4777         SWRAP_UNLOCK_SI(si);
4778         return ret;
4779 }
4780
4781 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4782 int getsockopt(int s, int level, int optname, void *optval, Psocklen_t optlen)
4783 #else
4784 int getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
4785 #endif
4786 {
4787         return swrap_getsockopt(s, level, optname, optval, (socklen_t *)optlen);
4788 }
4789
4790 /****************************************************************************
4791  *   SETSOCKOPT
4792  ***************************************************************************/
4793
4794 static int swrap_setsockopt(int s, int level, int optname,
4795                             const void *optval, socklen_t optlen)
4796 {
4797         struct socket_info *si = find_socket_info(s);
4798         int ret;
4799
4800         if (!si) {
4801                 return libc_setsockopt(s,
4802                                        level,
4803                                        optname,
4804                                        optval,
4805                                        optlen);
4806         }
4807
4808         if (level == SOL_SOCKET) {
4809                 return libc_setsockopt(s,
4810                                        level,
4811                                        optname,
4812                                        optval,
4813                                        optlen);
4814         }
4815
4816         SWRAP_LOCK_SI(si);
4817
4818         if (level == IPPROTO_TCP) {
4819                 switch (optname) {
4820 #ifdef TCP_NODELAY
4821                 case TCP_NODELAY: {
4822                         int i;
4823
4824                         /*
4825                          * This enables sending packets directly out over TCP.
4826                          * A unix socket is doing that any way.
4827                          */
4828                         if (optval == NULL || optlen == 0 ||
4829                             optlen < (socklen_t)sizeof(int)) {
4830                                 errno = EINVAL;
4831                                 ret = -1;
4832                                 goto done;
4833                         }
4834
4835                         i = *discard_const_p(int, optval);
4836                         if (i != 0 && i != 1) {
4837                                 errno = EINVAL;
4838                                 ret = -1;
4839                                 goto done;
4840                         }
4841                         si->tcp_nodelay = i;
4842
4843                         ret = 0;
4844                         goto done;
4845                 }
4846 #endif /* TCP_NODELAY */
4847                 default:
4848                         break;
4849                 }
4850         }
4851
4852         switch (si->family) {
4853         case AF_INET:
4854                 if (level == IPPROTO_IP) {
4855 #ifdef IP_PKTINFO
4856                         if (optname == IP_PKTINFO) {
4857                                 si->pktinfo = AF_INET;
4858                         }
4859 #endif /* IP_PKTINFO */
4860                 }
4861                 ret = 0;
4862                 goto done;
4863 #ifdef HAVE_IPV6
4864         case AF_INET6:
4865                 if (level == IPPROTO_IPV6) {
4866 #ifdef IPV6_RECVPKTINFO
4867                         if (optname == IPV6_RECVPKTINFO) {
4868                                 si->pktinfo = AF_INET6;
4869                         }
4870 #endif /* IPV6_PKTINFO */
4871                 }
4872                 ret = 0;
4873                 goto done;
4874 #endif
4875         default:
4876                 errno = ENOPROTOOPT;
4877                 ret = -1;
4878                 goto done;
4879         }
4880
4881 done:
4882         SWRAP_UNLOCK_SI(si);
4883         return ret;
4884 }
4885
4886 int setsockopt(int s, int level, int optname,
4887                const void *optval, socklen_t optlen)
4888 {
4889         return swrap_setsockopt(s, level, optname, optval, optlen);
4890 }
4891
4892 /****************************************************************************
4893  *   IOCTL
4894  ***************************************************************************/
4895
4896 static int swrap_vioctl(int s, unsigned long int r, va_list va)
4897 {
4898         struct socket_info *si = find_socket_info(s);
4899         va_list ap;
4900         int *value_ptr = NULL;
4901         int rc;
4902
4903         if (!si) {
4904                 return libc_vioctl(s, r, va);
4905         }
4906
4907         SWRAP_LOCK_SI(si);
4908
4909         va_copy(ap, va);
4910
4911         rc = libc_vioctl(s, r, va);
4912
4913         switch (r) {
4914         case FIONREAD:
4915                 if (rc == 0) {
4916                         value_ptr = ((int *)va_arg(ap, int *));
4917                 }
4918
4919                 if (rc == -1 && errno != EAGAIN && errno != ENOBUFS) {
4920                         swrap_pcap_dump_packet(si, NULL, SWRAP_PENDING_RST, NULL, 0);
4921                 } else if (value_ptr != NULL && *value_ptr == 0) { /* END OF FILE */
4922                         swrap_pcap_dump_packet(si, NULL, SWRAP_PENDING_RST, NULL, 0);
4923                 }
4924                 break;
4925 #ifdef FIONWRITE
4926         case FIONWRITE:
4927                 /* this is FreeBSD */
4928                 FALL_THROUGH; /* to TIOCOUTQ */
4929 #endif /* FIONWRITE */
4930         case TIOCOUTQ: /* same as SIOCOUTQ on Linux */
4931                 /*
4932                  * This may return more bytes then the application
4933                  * sent into the socket, for tcp it should
4934                  * return the number of unacked bytes.
4935                  *
4936                  * On AF_UNIX, all bytes are immediately acked!
4937                  */
4938                 if (rc == 0) {
4939                         value_ptr = ((int *)va_arg(ap, int *));
4940                         *value_ptr = 0;
4941                 }
4942                 break;
4943         }
4944
4945         va_end(ap);
4946
4947         SWRAP_UNLOCK_SI(si);
4948         return rc;
4949 }
4950
4951 #ifdef HAVE_IOCTL_INT
4952 int ioctl(int s, int r, ...)
4953 #else
4954 int ioctl(int s, unsigned long int r, ...)
4955 #endif
4956 {
4957         va_list va;
4958         int rc;
4959
4960         va_start(va, r);
4961
4962         rc = swrap_vioctl(s, (unsigned long int) r, va);
4963
4964         va_end(va);
4965
4966         return rc;
4967 }
4968
4969 /*****************
4970  * CMSG
4971  *****************/
4972
4973 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
4974
4975 #ifndef CMSG_ALIGN
4976 # ifdef _ALIGN /* BSD */
4977 #define CMSG_ALIGN _ALIGN
4978 # else
4979 #define CMSG_ALIGN(len) (((len) + sizeof(size_t) - 1) & ~(sizeof(size_t) - 1))
4980 # endif /* _ALIGN */
4981 #endif /* CMSG_ALIGN */
4982
4983 /**
4984  * @brief Add a cmsghdr to a msghdr.
4985  *
4986  * This is an function to add any type of cmsghdr. It will operate on the
4987  * msg->msg_control and msg->msg_controllen you pass in by adapting them to
4988  * the buffer position after the added cmsg element. Hence, this function is
4989  * intended to be used with an intermediate msghdr and not on the original
4990  * one handed in by the client.
4991  *
4992  * @param[in]  msg      The msghdr to which to add the cmsg.
4993  *
4994  * @param[in]  level    The cmsg level to set.
4995  *
4996  * @param[in]  type     The cmsg type to set.
4997  *
4998  * @param[in]  data     The cmsg data to set.
4999  *
5000  * @param[in]  len      the length of the data to set.
5001  */
5002 static void swrap_msghdr_add_cmsghdr(struct msghdr *msg,
5003                                      int level,
5004                                      int type,
5005                                      const void *data,
5006                                      size_t len)
5007 {
5008         size_t cmlen = CMSG_LEN(len);
5009         size_t cmspace = CMSG_SPACE(len);
5010         uint8_t cmbuf[cmspace];
5011         void *cast_ptr = (void *)cmbuf;
5012         struct cmsghdr *cm = (struct cmsghdr *)cast_ptr;
5013         uint8_t *p;
5014
5015         memset(cmbuf, 0, cmspace);
5016
5017         if (msg->msg_controllen < cmlen) {
5018                 cmlen = msg->msg_controllen;
5019                 msg->msg_flags |= MSG_CTRUNC;
5020         }
5021
5022         if (msg->msg_controllen < cmspace) {
5023                 cmspace = msg->msg_controllen;
5024         }
5025
5026         /*
5027          * We copy the full input data into an intermediate cmsghdr first
5028          * in order to more easily cope with truncation.
5029          */
5030         cm->cmsg_len = cmlen;
5031         cm->cmsg_level = level;
5032         cm->cmsg_type = type;
5033         memcpy(CMSG_DATA(cm), data, len);
5034
5035         /*
5036          * We now copy the possibly truncated buffer.
5037          * We copy cmlen bytes, but consume cmspace bytes,
5038          * leaving the possible padding uninitialiazed.
5039          */
5040         p = (uint8_t *)msg->msg_control;
5041         memcpy(p, cm, cmlen);
5042         p += cmspace;
5043         msg->msg_control = p;
5044         msg->msg_controllen -= cmspace;
5045
5046         return;
5047 }
5048
5049 static int swrap_msghdr_add_pktinfo(struct socket_info *si,
5050                                     struct msghdr *msg)
5051 {
5052         /* Add packet info */
5053         switch (si->pktinfo) {
5054 #if defined(IP_PKTINFO) && (defined(HAVE_STRUCT_IN_PKTINFO) || defined(IP_RECVDSTADDR))
5055         case AF_INET: {
5056                 struct sockaddr_in *sin;
5057 #if defined(HAVE_STRUCT_IN_PKTINFO)
5058                 struct in_pktinfo pkt;
5059 #elif defined(IP_RECVDSTADDR)
5060                 struct in_addr pkt;
5061 #endif
5062
5063                 if (si->bindname.sa_socklen == sizeof(struct sockaddr_in)) {
5064                         sin = &si->bindname.sa.in;
5065                 } else {
5066                         if (si->myname.sa_socklen != sizeof(struct sockaddr_in)) {
5067                                 return 0;
5068                         }
5069                         sin = &si->myname.sa.in;
5070                 }
5071
5072                 ZERO_STRUCT(pkt);
5073
5074 #if defined(HAVE_STRUCT_IN_PKTINFO)
5075                 pkt.ipi_ifindex = socket_wrapper_default_iface();
5076                 pkt.ipi_addr.s_addr = sin->sin_addr.s_addr;
5077 #elif defined(IP_RECVDSTADDR)
5078                 pkt = sin->sin_addr;
5079 #endif
5080
5081                 swrap_msghdr_add_cmsghdr(msg, IPPROTO_IP, IP_PKTINFO,
5082                                          &pkt, sizeof(pkt));
5083
5084                 break;
5085         }
5086 #endif /* IP_PKTINFO */
5087 #if defined(HAVE_IPV6)
5088         case AF_INET6: {
5089 #if defined(IPV6_PKTINFO) && defined(HAVE_STRUCT_IN6_PKTINFO)
5090                 struct sockaddr_in6 *sin6;
5091                 struct in6_pktinfo pkt6;
5092
5093                 if (si->bindname.sa_socklen == sizeof(struct sockaddr_in6)) {
5094                         sin6 = &si->bindname.sa.in6;
5095                 } else {
5096                         if (si->myname.sa_socklen != sizeof(struct sockaddr_in6)) {
5097                                 return 0;
5098                         }
5099                         sin6 = &si->myname.sa.in6;
5100                 }
5101
5102                 ZERO_STRUCT(pkt6);
5103
5104                 pkt6.ipi6_ifindex = socket_wrapper_default_iface();
5105                 pkt6.ipi6_addr = sin6->sin6_addr;
5106
5107                 swrap_msghdr_add_cmsghdr(msg, IPPROTO_IPV6, IPV6_PKTINFO,
5108                                         &pkt6, sizeof(pkt6));
5109 #endif /* HAVE_STRUCT_IN6_PKTINFO */
5110
5111                 break;
5112         }
5113 #endif /* IPV6_PKTINFO */
5114         default:
5115                 return -1;
5116         }
5117
5118         return 0;
5119 }
5120
5121 static int swrap_msghdr_add_socket_info(struct socket_info *si,
5122                                         struct msghdr *omsg)
5123 {
5124         int rc = 0;
5125
5126         if (si->pktinfo > 0) {
5127                 rc = swrap_msghdr_add_pktinfo(si, omsg);
5128         }
5129
5130         return rc;
5131 }
5132
5133 static int swrap_sendmsg_copy_cmsg(const struct cmsghdr *cmsg,
5134                                    uint8_t **cm_data,
5135                                    size_t *cm_data_space);
5136 static int swrap_sendmsg_filter_cmsg_ipproto_ip(const struct cmsghdr *cmsg,
5137                                                 uint8_t **cm_data,
5138                                                 size_t *cm_data_space);
5139 static int swrap_sendmsg_filter_cmsg_sol_socket(const struct cmsghdr *cmsg,
5140                                                 uint8_t **cm_data,
5141                                                 size_t *cm_data_space);
5142
5143 static int swrap_sendmsg_filter_cmsghdr(const struct msghdr *_msg,
5144                                         uint8_t **cm_data,
5145                                         size_t *cm_data_space)
5146 {
5147         struct msghdr *msg = discard_const_p(struct msghdr, _msg);
5148         struct cmsghdr *cmsg;
5149         int rc = -1;
5150
5151         /* Nothing to do */
5152         if (msg->msg_controllen == 0 || msg->msg_control == NULL) {
5153                 return 0;
5154         }
5155
5156         for (cmsg = CMSG_FIRSTHDR(msg);
5157              cmsg != NULL;
5158              cmsg = CMSG_NXTHDR(msg, cmsg)) {
5159                 switch (cmsg->cmsg_level) {
5160                 case IPPROTO_IP:
5161                         rc = swrap_sendmsg_filter_cmsg_ipproto_ip(cmsg,
5162                                                                   cm_data,
5163                                                                   cm_data_space);
5164                         break;
5165                 case SOL_SOCKET:
5166                         rc = swrap_sendmsg_filter_cmsg_sol_socket(cmsg,
5167                                                                   cm_data,
5168                                                                   cm_data_space);
5169                         break;
5170                 default:
5171                         rc = swrap_sendmsg_copy_cmsg(cmsg,
5172                                                      cm_data,
5173                                                      cm_data_space);
5174                         break;
5175                 }
5176                 if (rc < 0) {
5177                         int saved_errno = errno;
5178                         SAFE_FREE(*cm_data);
5179                         *cm_data_space = 0;
5180                         errno = saved_errno;
5181                         return rc;
5182                 }
5183         }
5184
5185         return rc;
5186 }
5187
5188 static int swrap_sendmsg_copy_cmsg(const struct cmsghdr *cmsg,
5189                                    uint8_t **cm_data,
5190                                    size_t *cm_data_space)
5191 {
5192         size_t cmspace;
5193         uint8_t *p;
5194
5195         cmspace = *cm_data_space + CMSG_ALIGN(cmsg->cmsg_len);
5196
5197         p = realloc((*cm_data), cmspace);
5198         if (p == NULL) {
5199                 return -1;
5200         }
5201         (*cm_data) = p;
5202
5203         p = (*cm_data) + (*cm_data_space);
5204         *cm_data_space = cmspace;
5205
5206         memcpy(p, cmsg, cmsg->cmsg_len);
5207
5208         return 0;
5209 }
5210
5211 static int swrap_sendmsg_filter_cmsg_pktinfo(const struct cmsghdr *cmsg,
5212                                             uint8_t **cm_data,
5213                                             size_t *cm_data_space);
5214
5215
5216 static int swrap_sendmsg_filter_cmsg_ipproto_ip(const struct cmsghdr *cmsg,
5217                                                 uint8_t **cm_data,
5218                                                 size_t *cm_data_space)
5219 {
5220         int rc = -1;
5221
5222         switch(cmsg->cmsg_type) {
5223 #ifdef IP_PKTINFO
5224         case IP_PKTINFO:
5225                 rc = swrap_sendmsg_filter_cmsg_pktinfo(cmsg,
5226                                                        cm_data,
5227                                                        cm_data_space);
5228                 break;
5229 #endif
5230 #ifdef IPV6_PKTINFO
5231         case IPV6_PKTINFO:
5232                 rc = swrap_sendmsg_filter_cmsg_pktinfo(cmsg,
5233                                                        cm_data,
5234                                                        cm_data_space);
5235                 break;
5236 #endif
5237         default:
5238                 break;
5239         }
5240
5241         return rc;
5242 }
5243
5244 static int swrap_sendmsg_filter_cmsg_pktinfo(const struct cmsghdr *cmsg,
5245                                              uint8_t **cm_data,
5246                                              size_t *cm_data_space)
5247 {
5248         (void)cmsg; /* unused */
5249         (void)cm_data; /* unused */
5250         (void)cm_data_space; /* unused */
5251
5252         /*
5253          * Passing a IP pktinfo to a unix socket might be rejected by the
5254          * Kernel, at least on FreeBSD. So skip this cmsg.
5255          */
5256         return 0;
5257 }
5258
5259 static int swrap_sendmsg_filter_cmsg_sol_socket(const struct cmsghdr *cmsg,
5260                                                 uint8_t **cm_data,
5261                                                 size_t *cm_data_space)
5262 {
5263         int rc = -1;
5264
5265         switch (cmsg->cmsg_type) {
5266         case SCM_RIGHTS:
5267                 SWRAP_LOG(SWRAP_LOG_TRACE,
5268                           "Ignoring SCM_RIGHTS on inet socket!");
5269                 rc = 0;
5270                 break;
5271 #ifdef SCM_CREDENTIALS
5272         case SCM_CREDENTIALS:
5273                 SWRAP_LOG(SWRAP_LOG_TRACE,
5274                           "Ignoring SCM_CREDENTIALS on inet socket!");
5275                 rc = 0;
5276                 break;
5277 #endif /* SCM_CREDENTIALS */
5278         default:
5279                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5280                                              cm_data,
5281                                              cm_data_space);
5282                 break;
5283         }
5284
5285         return rc;
5286 }
5287
5288 static const uint64_t swrap_unix_scm_right_magic = 0x8e0e13f27c42fc36;
5289
5290 /*
5291  * We only allow up to 6 fds at a time
5292  * as that's more than enough for Samba
5293  * and it means we can keep the logic simple
5294  * and work with fixed size arrays.
5295  *
5296  * We also keep sizeof(struct swrap_unix_scm_rights)
5297  * under PIPE_BUF (4096) in order to allow a non-blocking
5298  * write into the pipe.
5299  */
5300 #ifndef PIPE_BUF
5301 #define PIPE_BUF 4096
5302 #endif
5303 #define SWRAP_MAX_PASSED_FDS ((size_t)6)
5304 #define SWRAP_MAX_PASSED_SOCKET_INFO SWRAP_MAX_PASSED_FDS
5305 struct swrap_unix_scm_rights_payload {
5306         uint8_t num_idxs;
5307         int8_t idxs[SWRAP_MAX_PASSED_FDS];
5308         struct socket_info infos[SWRAP_MAX_PASSED_SOCKET_INFO];
5309 };
5310 struct swrap_unix_scm_rights {
5311         uint64_t magic;
5312         char package_name[sizeof(SOCKET_WRAPPER_PACKAGE)];
5313         char package_version[sizeof(SOCKET_WRAPPER_VERSION)];
5314         uint32_t full_size;
5315         uint32_t payload_size;
5316         struct swrap_unix_scm_rights_payload payload;
5317 };
5318
5319 static void swrap_dec_fd_passed_array(size_t num, struct socket_info **array)
5320 {
5321         int saved_errno = errno;
5322         size_t i;
5323
5324         for (i = 0; i < num; i++) {
5325                 struct socket_info *si = array[i];
5326                 if (si == NULL) {
5327                         continue;
5328                 }
5329
5330                 SWRAP_LOCK_SI(si);
5331                 swrap_dec_refcount(si);
5332                 if (si->fd_passed > 0) {
5333                         si->fd_passed -= 1;
5334                 }
5335                 SWRAP_UNLOCK_SI(si);
5336                 array[i] = NULL;
5337         }
5338
5339         errno = saved_errno;
5340 }
5341
5342 static void swrap_undo_si_idx_array(size_t num, int *array)
5343 {
5344         int saved_errno = errno;
5345         size_t i;
5346
5347         swrap_mutex_lock(&first_free_mutex);
5348
5349         for (i = 0; i < num; i++) {
5350                 struct socket_info *si = NULL;
5351
5352                 if (array[i] == -1) {
5353                         continue;
5354                 }
5355
5356                 si = swrap_get_socket_info(array[i]);
5357                 if (si == NULL) {
5358                         continue;
5359                 }
5360
5361                 SWRAP_LOCK_SI(si);
5362                 swrap_dec_refcount(si);
5363                 SWRAP_UNLOCK_SI(si);
5364
5365                 swrap_set_next_free(si, first_free);
5366                 first_free = array[i];
5367                 array[i] = -1;
5368         }
5369
5370         swrap_mutex_unlock(&first_free_mutex);
5371         errno = saved_errno;
5372 }
5373
5374 static void swrap_close_fd_array(size_t num, const int *array)
5375 {
5376         int saved_errno = errno;
5377         size_t i;
5378
5379         for (i = 0; i < num; i++) {
5380                 if (array[i] == -1) {
5381                         continue;
5382                 }
5383                 libc_close(array[i]);
5384         }
5385
5386         errno = saved_errno;
5387 }
5388
5389 union __swrap_fds {
5390         const uint8_t *p;
5391         int *fds;
5392 };
5393
5394 union __swrap_cmsghdr {
5395         const uint8_t *p;
5396         struct cmsghdr *cmsg;
5397 };
5398
5399 static int swrap_sendmsg_unix_scm_rights(struct cmsghdr *cmsg,
5400                                          uint8_t **cm_data,
5401                                          size_t *cm_data_space,
5402                                          int *scm_rights_pipe_fd)
5403 {
5404         struct swrap_unix_scm_rights info;
5405         struct swrap_unix_scm_rights_payload *payload = NULL;
5406         int si_idx_array[SWRAP_MAX_PASSED_FDS];
5407         struct socket_info *si_array[SWRAP_MAX_PASSED_FDS] = { NULL, };
5408         size_t info_idx = 0;
5409         size_t size_fds_in;
5410         size_t num_fds_in;
5411         union __swrap_fds __fds_in = { .p = NULL, };
5412         const int *fds_in = NULL;
5413         size_t num_fds_out;
5414         size_t size_fds_out;
5415         union __swrap_fds __fds_out = { .p = NULL, };
5416         int *fds_out = NULL;
5417         size_t cmsg_len;
5418         size_t cmsg_space;
5419         size_t new_cm_data_space;
5420         union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
5421         struct cmsghdr *new_cmsg = NULL;
5422         uint8_t *p = NULL;
5423         size_t i;
5424         int pipefd[2] = { -1, -1 };
5425         int rc;
5426         ssize_t sret;
5427
5428         /*
5429          * We pass this a buffer to the kernel make sure any padding
5430          * is also cleared.
5431          */
5432         ZERO_STRUCT(info);
5433         info.magic = swrap_unix_scm_right_magic;
5434         memcpy(info.package_name,
5435                SOCKET_WRAPPER_PACKAGE,
5436                sizeof(info.package_name));
5437         memcpy(info.package_version,
5438                SOCKET_WRAPPER_VERSION,
5439                sizeof(info.package_version));
5440         info.full_size = sizeof(info);
5441         info.payload_size = sizeof(info.payload);
5442         payload = &info.payload;
5443
5444         if (*scm_rights_pipe_fd != -1) {
5445                 SWRAP_LOG(SWRAP_LOG_ERROR,
5446                           "Two SCM_RIGHTS headers are not supported by socket_wrapper");
5447                 errno = EINVAL;
5448                 return -1;
5449         }
5450
5451         if (cmsg->cmsg_len < CMSG_LEN(0)) {
5452                 SWRAP_LOG(SWRAP_LOG_ERROR,
5453                           "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
5454                           (size_t)cmsg->cmsg_len,
5455                           CMSG_LEN(0));
5456                 errno = EINVAL;
5457                 return -1;
5458         }
5459         size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
5460         if ((size_fds_in % sizeof(int)) != 0) {
5461                 SWRAP_LOG(SWRAP_LOG_ERROR,
5462                           "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
5463                           (size_t)cmsg->cmsg_len,
5464                           size_fds_in,
5465                           sizeof(int));
5466                 errno = EINVAL;
5467                 return -1;
5468         }
5469         num_fds_in = size_fds_in / sizeof(int);
5470         if (num_fds_in > SWRAP_MAX_PASSED_FDS) {
5471                 SWRAP_LOG(SWRAP_LOG_ERROR,
5472                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5473                           "num_fds_in=%zu > "
5474                           "SWRAP_MAX_PASSED_FDS(%zu)",
5475                           (size_t)cmsg->cmsg_len,
5476                           size_fds_in,
5477                           num_fds_in,
5478                           SWRAP_MAX_PASSED_FDS);
5479                 errno = EINVAL;
5480                 return -1;
5481         }
5482         if (num_fds_in == 0) {
5483                 SWRAP_LOG(SWRAP_LOG_ERROR,
5484                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5485                           "num_fds_in=%zu",
5486                           (size_t)cmsg->cmsg_len,
5487                           size_fds_in,
5488                           num_fds_in);
5489                 errno = EINVAL;
5490                 return -1;
5491         }
5492         __fds_in.p = CMSG_DATA(cmsg);
5493         fds_in = __fds_in.fds;
5494         num_fds_out = num_fds_in + 1;
5495
5496         SWRAP_LOG(SWRAP_LOG_TRACE,
5497                   "num_fds_in=%zu num_fds_out=%zu",
5498                   num_fds_in, num_fds_out);
5499
5500         size_fds_out = sizeof(int) * num_fds_out;
5501         cmsg_len = CMSG_LEN(size_fds_out);
5502         cmsg_space = CMSG_SPACE(size_fds_out);
5503
5504         new_cm_data_space = *cm_data_space + cmsg_space;
5505
5506         p = realloc((*cm_data), new_cm_data_space);
5507         if (p == NULL) {
5508                 return -1;
5509         }
5510         (*cm_data) = p;
5511         p = (*cm_data) + (*cm_data_space);
5512         memset(p, 0, cmsg_space);
5513         __new_cmsg.p = p;
5514         new_cmsg = __new_cmsg.cmsg;
5515         *new_cmsg = *cmsg;
5516         __fds_out.p = CMSG_DATA(new_cmsg);
5517         fds_out = __fds_out.fds;
5518         memcpy(fds_out, fds_in, size_fds_in);
5519         new_cmsg->cmsg_len = cmsg->cmsg_len;
5520
5521         for (i = 0; i < num_fds_in; i++) {
5522                 size_t j;
5523
5524                 payload->idxs[i] = -1;
5525                 payload->num_idxs++;
5526
5527                 si_idx_array[i] = find_socket_info_index(fds_in[i]);
5528                 if (si_idx_array[i] == -1) {
5529                         continue;
5530                 }
5531
5532                 si_array[i] = swrap_get_socket_info(si_idx_array[i]);
5533                 if (si_array[i] == NULL) {
5534                         SWRAP_LOG(SWRAP_LOG_ERROR,
5535                                   "fds_in[%zu]=%d si_idx_array[%zu]=%d missing!",
5536                                   i, fds_in[i], i, si_idx_array[i]);
5537                         errno = EINVAL;
5538                         return -1;
5539                 }
5540
5541                 for (j = 0; j < i; j++) {
5542                         if (si_array[j] == si_array[i]) {
5543                                 payload->idxs[i] = payload->idxs[j];
5544                                 break;
5545                         }
5546                 }
5547                 if (payload->idxs[i] == -1) {
5548                         if (info_idx >= SWRAP_MAX_PASSED_SOCKET_INFO) {
5549                                 SWRAP_LOG(SWRAP_LOG_ERROR,
5550                                           "fds_in[%zu]=%d,si_idx_array[%zu]=%d: "
5551                                           "info_idx=%zu >= SWRAP_MAX_PASSED_FDS(%zu)!",
5552                                           i, fds_in[i], i, si_idx_array[i],
5553                                           info_idx,
5554                                           SWRAP_MAX_PASSED_SOCKET_INFO);
5555                                 errno = EINVAL;
5556                                 return -1;
5557                         }
5558                         payload->idxs[i] = info_idx;
5559                         info_idx += 1;
5560                         continue;
5561                 }
5562         }
5563
5564         for (i = 0; i < num_fds_in; i++) {
5565                 struct socket_info *si = si_array[i];
5566
5567                 if (si == NULL) {
5568                         SWRAP_LOG(SWRAP_LOG_TRACE,
5569                                   "fds_in[%zu]=%d not an inet socket",
5570                                   i, fds_in[i]);
5571                         continue;
5572                 }
5573
5574                 SWRAP_LOG(SWRAP_LOG_TRACE,
5575                           "fds_in[%zu]=%d si_idx_array[%zu]=%d "
5576                           "passing as info.idxs[%zu]=%d!",
5577                           i, fds_in[i],
5578                           i, si_idx_array[i],
5579                           i, payload->idxs[i]);
5580
5581                 SWRAP_LOCK_SI(si);
5582                 si->fd_passed += 1;
5583                 payload->infos[payload->idxs[i]] = *si;
5584                 payload->infos[payload->idxs[i]].fd_passed = 0;
5585                 SWRAP_UNLOCK_SI(si);
5586         }
5587
5588         rc = pipe(pipefd);
5589         if (rc == -1) {
5590                 int saved_errno = errno;
5591                 SWRAP_LOG(SWRAP_LOG_ERROR,
5592                           "pipe() failed - %d %s",
5593                           saved_errno,
5594                           strerror(saved_errno));
5595                 swrap_dec_fd_passed_array(num_fds_in, si_array);
5596                 errno = saved_errno;
5597                 return -1;
5598         }
5599
5600         sret = libc_write(pipefd[1], &info, sizeof(info));
5601         if (sret != sizeof(info)) {
5602                 int saved_errno = errno;
5603                 if (sret != -1) {
5604                         saved_errno = EINVAL;
5605                 }
5606                 SWRAP_LOG(SWRAP_LOG_ERROR,
5607                           "write() failed - sret=%zd - %d %s",
5608                           sret, saved_errno,
5609                           strerror(saved_errno));
5610                 swrap_dec_fd_passed_array(num_fds_in, si_array);
5611                 libc_close(pipefd[1]);
5612                 libc_close(pipefd[0]);
5613                 errno = saved_errno;
5614                 return -1;
5615         }
5616         libc_close(pipefd[1]);
5617
5618         /*
5619          * Add the pipe read end to the end of the passed fd array
5620          */
5621         fds_out[num_fds_in] = pipefd[0];
5622         new_cmsg->cmsg_len = cmsg_len;
5623
5624         /* we're done ... */
5625         *scm_rights_pipe_fd = pipefd[0];
5626         *cm_data_space = new_cm_data_space;
5627
5628         return 0;
5629 }
5630
5631 static int swrap_sendmsg_unix_sol_socket(struct cmsghdr *cmsg,
5632                                          uint8_t **cm_data,
5633                                          size_t *cm_data_space,
5634                                          int *scm_rights_pipe_fd)
5635 {
5636         int rc = -1;
5637
5638         switch (cmsg->cmsg_type) {
5639         case SCM_RIGHTS:
5640                 rc = swrap_sendmsg_unix_scm_rights(cmsg,
5641                                                    cm_data,
5642                                                    cm_data_space,
5643                                                    scm_rights_pipe_fd);
5644                 break;
5645         default:
5646                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5647                                              cm_data,
5648                                              cm_data_space);
5649                 break;
5650         }
5651
5652         return rc;
5653 }
5654
5655 static int swrap_recvmsg_unix_scm_rights(struct cmsghdr *cmsg,
5656                                          uint8_t **cm_data,
5657                                          size_t *cm_data_space)
5658 {
5659         int scm_rights_pipe_fd = -1;
5660         struct swrap_unix_scm_rights info;
5661         struct swrap_unix_scm_rights_payload *payload = NULL;
5662         int si_idx_array[SWRAP_MAX_PASSED_FDS];
5663         size_t size_fds_in;
5664         size_t num_fds_in;
5665         union __swrap_fds __fds_in = { .p = NULL, };
5666         const int *fds_in = NULL;
5667         size_t num_fds_out;
5668         size_t size_fds_out;
5669         union __swrap_fds __fds_out = { .p = NULL, };
5670         int *fds_out = NULL;
5671         size_t cmsg_len;
5672         size_t cmsg_space;
5673         size_t new_cm_data_space;
5674         union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
5675         struct cmsghdr *new_cmsg = NULL;
5676         uint8_t *p = NULL;
5677         ssize_t sret;
5678         size_t i;
5679         int cmp;
5680
5681         if (cmsg->cmsg_len < CMSG_LEN(0)) {
5682                 SWRAP_LOG(SWRAP_LOG_ERROR,
5683                           "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
5684                           (size_t)cmsg->cmsg_len,
5685                           CMSG_LEN(0));
5686                 errno = EINVAL;
5687                 return -1;
5688         }
5689         size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
5690         if ((size_fds_in % sizeof(int)) != 0) {
5691                 SWRAP_LOG(SWRAP_LOG_ERROR,
5692                           "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
5693                           (size_t)cmsg->cmsg_len,
5694                           size_fds_in,
5695                           sizeof(int));
5696                 errno = EINVAL;
5697                 return -1;
5698         }
5699         num_fds_in = size_fds_in / sizeof(int);
5700         if (num_fds_in > (SWRAP_MAX_PASSED_FDS + 1)) {
5701                 SWRAP_LOG(SWRAP_LOG_ERROR,
5702                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5703                           "num_fds_in=%zu > SWRAP_MAX_PASSED_FDS+1(%zu)",
5704                           (size_t)cmsg->cmsg_len,
5705                           size_fds_in,
5706                           num_fds_in,
5707                           SWRAP_MAX_PASSED_FDS+1);
5708                 errno = EINVAL;
5709                 return -1;
5710         }
5711         if (num_fds_in <= 1) {
5712                 SWRAP_LOG(SWRAP_LOG_ERROR,
5713                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5714                           "num_fds_in=%zu",
5715                           (size_t)cmsg->cmsg_len,
5716                           size_fds_in,
5717                           num_fds_in);
5718                 errno = EINVAL;
5719                 return -1;
5720         }
5721         __fds_in.p = CMSG_DATA(cmsg);
5722         fds_in = __fds_in.fds;
5723         num_fds_out = num_fds_in - 1;
5724
5725         SWRAP_LOG(SWRAP_LOG_TRACE,
5726                   "num_fds_in=%zu num_fds_out=%zu",
5727                   num_fds_in, num_fds_out);
5728
5729         for (i = 0; i < num_fds_in; i++) {
5730                 /* Check if we have a stale fd and remove it */
5731                 swrap_remove_stale(fds_in[i]);
5732         }
5733
5734         scm_rights_pipe_fd = fds_in[num_fds_out];
5735         size_fds_out = sizeof(int) * num_fds_out;
5736         cmsg_len = CMSG_LEN(size_fds_out);
5737         cmsg_space = CMSG_SPACE(size_fds_out);
5738
5739         new_cm_data_space = *cm_data_space + cmsg_space;
5740
5741         p = realloc((*cm_data), new_cm_data_space);
5742         if (p == NULL) {
5743                 swrap_close_fd_array(num_fds_in, fds_in);
5744                 return -1;
5745         }
5746         (*cm_data) = p;
5747         p = (*cm_data) + (*cm_data_space);
5748         memset(p, 0, cmsg_space);
5749         __new_cmsg.p = p;
5750         new_cmsg = __new_cmsg.cmsg;
5751         *new_cmsg = *cmsg;
5752         __fds_out.p = CMSG_DATA(new_cmsg);
5753         fds_out = __fds_out.fds;
5754         memcpy(fds_out, fds_in, size_fds_out);
5755         new_cmsg->cmsg_len = cmsg_len;
5756
5757         sret = read(scm_rights_pipe_fd, &info, sizeof(info));
5758         if (sret != sizeof(info)) {
5759                 int saved_errno = errno;
5760                 if (sret != -1) {
5761                         saved_errno = EINVAL;
5762                 }
5763                 SWRAP_LOG(SWRAP_LOG_ERROR,
5764                           "read() failed - sret=%zd - %d %s",
5765                           sret, saved_errno,
5766                           strerror(saved_errno));
5767                 swrap_close_fd_array(num_fds_in, fds_in);
5768                 errno = saved_errno;
5769                 return -1;
5770         }
5771         libc_close(scm_rights_pipe_fd);
5772         payload = &info.payload;
5773
5774         if (info.magic != swrap_unix_scm_right_magic) {
5775                 SWRAP_LOG(SWRAP_LOG_ERROR,
5776                           "info.magic=0x%llx != swrap_unix_scm_right_magic=0x%llx",
5777                           (unsigned long long)info.magic,
5778                           (unsigned long long)swrap_unix_scm_right_magic);
5779                 swrap_close_fd_array(num_fds_out, fds_out);
5780                 errno = EINVAL;
5781                 return -1;
5782         }
5783
5784         cmp = memcmp(info.package_name,
5785                      SOCKET_WRAPPER_PACKAGE,
5786                      sizeof(info.package_name));
5787         if (cmp != 0) {
5788                 SWRAP_LOG(SWRAP_LOG_ERROR,
5789                           "info.package_name='%.*s' != '%s'",
5790                           (int)sizeof(info.package_name),
5791                           info.package_name,
5792                           SOCKET_WRAPPER_PACKAGE);
5793                 swrap_close_fd_array(num_fds_out, fds_out);
5794                 errno = EINVAL;
5795                 return -1;
5796         }
5797
5798         cmp = memcmp(info.package_version,
5799                      SOCKET_WRAPPER_VERSION,
5800                      sizeof(info.package_version));
5801         if (cmp != 0) {
5802                 SWRAP_LOG(SWRAP_LOG_ERROR,
5803                           "info.package_version='%.*s' != '%s'",
5804                           (int)sizeof(info.package_version),
5805                           info.package_version,
5806                           SOCKET_WRAPPER_VERSION);
5807                 swrap_close_fd_array(num_fds_out, fds_out);
5808                 errno = EINVAL;
5809                 return -1;
5810         }
5811
5812         if (info.full_size != sizeof(info)) {
5813                 SWRAP_LOG(SWRAP_LOG_ERROR,
5814                           "info.full_size=%zu != sizeof(info)=%zu",
5815                           (size_t)info.full_size,
5816                           sizeof(info));
5817                 swrap_close_fd_array(num_fds_out, fds_out);
5818                 errno = EINVAL;
5819                 return -1;
5820         }
5821
5822         if (info.payload_size != sizeof(info.payload)) {
5823                 SWRAP_LOG(SWRAP_LOG_ERROR,
5824                           "info.payload_size=%zu != sizeof(info.payload)=%zu",
5825                           (size_t)info.payload_size,
5826                           sizeof(info.payload));
5827                 swrap_close_fd_array(num_fds_out, fds_out);
5828                 errno = EINVAL;
5829                 return -1;
5830         }
5831
5832         if (payload->num_idxs != num_fds_out) {
5833                 SWRAP_LOG(SWRAP_LOG_ERROR,
5834                           "info.num_idxs=%u != num_fds_out=%zu",
5835                           payload->num_idxs, num_fds_out);
5836                 swrap_close_fd_array(num_fds_out, fds_out);
5837                 errno = EINVAL;
5838                 return -1;
5839         }
5840
5841         for (i = 0; i < num_fds_out; i++) {
5842                 size_t j;
5843
5844                 si_idx_array[i] = -1;
5845
5846                 if (payload->idxs[i] == -1) {
5847                         SWRAP_LOG(SWRAP_LOG_TRACE,
5848                                   "fds_out[%zu]=%d not an inet socket",
5849                                   i, fds_out[i]);
5850                         continue;
5851                 }
5852
5853                 if (payload->idxs[i] < 0) {
5854                         SWRAP_LOG(SWRAP_LOG_ERROR,
5855                                   "fds_out[%zu]=%d info.idxs[%zu]=%d < 0!",
5856                                   i, fds_out[i], i, payload->idxs[i]);
5857                         swrap_close_fd_array(num_fds_out, fds_out);
5858                         errno = EINVAL;
5859                         return -1;
5860                 }
5861
5862                 if (payload->idxs[i] >= payload->num_idxs) {
5863                         SWRAP_LOG(SWRAP_LOG_ERROR,
5864                                   "fds_out[%zu]=%d info.idxs[%zu]=%d >= %u!",
5865                                   i, fds_out[i], i, payload->idxs[i],
5866                                   payload->num_idxs);
5867                         swrap_close_fd_array(num_fds_out, fds_out);
5868                         errno = EINVAL;
5869                         return -1;
5870                 }
5871
5872                 if ((size_t)fds_out[i] >= socket_fds_max) {
5873                         SWRAP_LOG(SWRAP_LOG_ERROR,
5874                                   "The max socket index limit of %zu has been reached, "
5875                                   "trying to add %d",
5876                                   socket_fds_max,
5877                                   fds_out[i]);
5878                         swrap_close_fd_array(num_fds_out, fds_out);
5879                         errno = EMFILE;
5880                         return -1;
5881                 }
5882
5883                 SWRAP_LOG(SWRAP_LOG_TRACE,
5884                           "fds_in[%zu]=%d "
5885                           "received as info.idxs[%zu]=%d!",
5886                           i, fds_out[i],
5887                           i, payload->idxs[i]);
5888
5889                 for (j = 0; j < i; j++) {
5890                         if (payload->idxs[j] == -1) {
5891                                 continue;
5892                         }
5893                         if (payload->idxs[j] == payload->idxs[i]) {
5894                                 si_idx_array[i] = si_idx_array[j];
5895                         }
5896                 }
5897                 if (si_idx_array[i] == -1) {
5898                         const struct socket_info *si = &payload->infos[payload->idxs[i]];
5899
5900                         si_idx_array[i] = swrap_add_socket_info(si);
5901                         if (si_idx_array[i] == -1) {
5902                                 int saved_errno = errno;
5903                                 SWRAP_LOG(SWRAP_LOG_ERROR,
5904                                           "The max socket index limit of %zu has been reached, "
5905                                           "trying to add %d",
5906                                           socket_fds_max,
5907                                           fds_out[i]);
5908                                 swrap_undo_si_idx_array(i, si_idx_array);
5909                                 swrap_close_fd_array(num_fds_out, fds_out);
5910                                 errno = saved_errno;
5911                                 return -1;
5912                         }
5913                         SWRAP_LOG(SWRAP_LOG_TRACE,
5914                                   "Imported %s socket for protocol %s, fd=%d",
5915                                   si->family == AF_INET ? "IPv4" : "IPv6",
5916                                   si->type == SOCK_DGRAM ? "UDP" : "TCP",
5917                                   fds_out[i]);
5918                 }
5919         }
5920
5921         for (i = 0; i < num_fds_out; i++) {
5922                 if (si_idx_array[i] == -1) {
5923                         continue;
5924                 }
5925                 set_socket_info_index(fds_out[i], si_idx_array[i]);
5926         }
5927
5928         /* we're done ... */
5929         *cm_data_space = new_cm_data_space;
5930
5931         return 0;
5932 }
5933
5934 static int swrap_recvmsg_unix_sol_socket(struct cmsghdr *cmsg,
5935                                          uint8_t **cm_data,
5936                                          size_t *cm_data_space)
5937 {
5938         int rc = -1;
5939
5940         switch (cmsg->cmsg_type) {
5941         case SCM_RIGHTS:
5942                 rc = swrap_recvmsg_unix_scm_rights(cmsg,
5943                                                    cm_data,
5944                                                    cm_data_space);
5945                 break;
5946         default:
5947                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5948                                              cm_data,
5949                                              cm_data_space);
5950                 break;
5951         }
5952
5953         return rc;
5954 }
5955
5956 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5957
5958 static int swrap_sendmsg_before_unix(const struct msghdr *_msg_in,
5959                                      struct msghdr *msg_tmp,
5960                                      int *scm_rights_pipe_fd)
5961 {
5962 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
5963         struct msghdr *msg_in = discard_const_p(struct msghdr, _msg_in);
5964         struct cmsghdr *cmsg = NULL;
5965         uint8_t *cm_data = NULL;
5966         size_t cm_data_space = 0;
5967         int rc = -1;
5968
5969         *msg_tmp = *msg_in;
5970         *scm_rights_pipe_fd = -1;
5971
5972         /* Nothing to do */
5973         if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
5974                 return 0;
5975         }
5976
5977         for (cmsg = CMSG_FIRSTHDR(msg_in);
5978              cmsg != NULL;
5979              cmsg = CMSG_NXTHDR(msg_in, cmsg)) {
5980                 switch (cmsg->cmsg_level) {
5981                 case SOL_SOCKET:
5982                         rc = swrap_sendmsg_unix_sol_socket(cmsg,
5983                                                            &cm_data,
5984                                                            &cm_data_space,
5985                                                            scm_rights_pipe_fd);
5986                         break;
5987
5988                 default:
5989                         rc = swrap_sendmsg_copy_cmsg(cmsg,
5990                                                      &cm_data,
5991                                                      &cm_data_space);
5992                         break;
5993                 }
5994                 if (rc < 0) {
5995                         int saved_errno = errno;
5996                         SAFE_FREE(cm_data);
5997                         errno = saved_errno;
5998                         return rc;
5999                 }
6000         }
6001
6002         msg_tmp->msg_controllen = cm_data_space;
6003         msg_tmp->msg_control = cm_data;
6004
6005         return 0;
6006 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6007         *msg_tmp = *_msg_in;
6008         return 0;
6009 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6010 }
6011
6012 static ssize_t swrap_sendmsg_after_unix(struct msghdr *msg_tmp,
6013                                         ssize_t ret,
6014                                         int scm_rights_pipe_fd)
6015 {
6016 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6017         int saved_errno = errno;
6018         SAFE_FREE(msg_tmp->msg_control);
6019         if (scm_rights_pipe_fd != -1) {
6020                 libc_close(scm_rights_pipe_fd);
6021         }
6022         errno = saved_errno;
6023 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6024         return ret;
6025 }
6026
6027 static int swrap_recvmsg_before_unix(struct msghdr *msg_in,
6028                                      struct msghdr *msg_tmp,
6029                                      uint8_t **tmp_control)
6030 {
6031 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6032         const size_t cm_extra_space = CMSG_SPACE(sizeof(int));
6033         uint8_t *cm_data = NULL;
6034         size_t cm_data_space = 0;
6035
6036         *msg_tmp = *msg_in;
6037         *tmp_control = NULL;
6038
6039         SWRAP_LOG(SWRAP_LOG_TRACE,
6040                   "msg_in->msg_controllen=%zu",
6041                   (size_t)msg_in->msg_controllen);
6042
6043         /* Nothing to do */
6044         if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
6045                 return 0;
6046         }
6047
6048         /*
6049          * We need to give the kernel a bit more space in order
6050          * recv the pipe fd, added by swrap_sendmsg_before_unix()).
6051          * swrap_recvmsg_after_unix() will hide it again.
6052          */
6053         cm_data_space = msg_in->msg_controllen;
6054         if (cm_data_space < (INT32_MAX - cm_extra_space)) {
6055                 cm_data_space += cm_extra_space;
6056         }
6057         cm_data = calloc(1, cm_data_space);
6058         if (cm_data == NULL) {
6059                 return -1;
6060         }
6061
6062         msg_tmp->msg_controllen = cm_data_space;
6063         msg_tmp->msg_control = cm_data;
6064         *tmp_control = cm_data;
6065
6066         SWRAP_LOG(SWRAP_LOG_TRACE,
6067                   "msg_tmp->msg_controllen=%zu",
6068                   (size_t)msg_tmp->msg_controllen);
6069         return 0;
6070 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6071         *msg_tmp = *msg_in;
6072         *tmp_control = NULL;
6073         return 0;
6074 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6075 }
6076
6077 static ssize_t swrap_recvmsg_after_unix(struct msghdr *msg_tmp,
6078                                         uint8_t **tmp_control,
6079                                         struct msghdr *msg_out,
6080                                         ssize_t ret)
6081 {
6082 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6083         struct cmsghdr *cmsg = NULL;
6084         uint8_t *cm_data = NULL;
6085         size_t cm_data_space = 0;
6086         int rc = -1;
6087
6088         if (ret < 0) {
6089                 int saved_errno = errno;
6090                 SWRAP_LOG(SWRAP_LOG_TRACE, "ret=%zd - %d - %s", ret,
6091                           saved_errno, strerror(saved_errno));
6092                 SAFE_FREE(*tmp_control);
6093                 /* msg_out should not be touched on error */
6094                 errno = saved_errno;
6095                 return ret;
6096         }
6097
6098         SWRAP_LOG(SWRAP_LOG_TRACE,
6099                   "msg_tmp->msg_controllen=%zu",
6100                   (size_t)msg_tmp->msg_controllen);
6101
6102         /* Nothing to do */
6103         if (msg_tmp->msg_controllen == 0 || msg_tmp->msg_control == NULL) {
6104                 int saved_errno = errno;
6105                 *msg_out = *msg_tmp;
6106                 SAFE_FREE(*tmp_control);
6107                 errno = saved_errno;
6108                 return ret;
6109         }
6110
6111         for (cmsg = CMSG_FIRSTHDR(msg_tmp);
6112              cmsg != NULL;
6113              cmsg = CMSG_NXTHDR(msg_tmp, cmsg)) {
6114                 switch (cmsg->cmsg_level) {
6115                 case SOL_SOCKET:
6116                         rc = swrap_recvmsg_unix_sol_socket(cmsg,
6117                                                            &cm_data,
6118                                                            &cm_data_space);
6119                         break;
6120
6121                 default:
6122                         rc = swrap_sendmsg_copy_cmsg(cmsg,
6123                                                      &cm_data,
6124                                                      &cm_data_space);
6125                         break;
6126                 }
6127                 if (rc < 0) {
6128                         int saved_errno = errno;
6129                         SAFE_FREE(cm_data);
6130                         SAFE_FREE(*tmp_control);
6131                         errno = saved_errno;
6132                         return rc;
6133                 }
6134         }
6135
6136         /*
6137          * msg_tmp->msg_control (*tmp_control) was created by
6138          * swrap_recvmsg_before_unix() and msg_out->msg_control
6139          * is still the buffer of the caller.
6140          */
6141         msg_tmp->msg_control = msg_out->msg_control;
6142         msg_tmp->msg_controllen = msg_out->msg_controllen;
6143         *msg_out = *msg_tmp;
6144
6145         cm_data_space = MIN(cm_data_space, msg_out->msg_controllen);
6146         memcpy(msg_out->msg_control, cm_data, cm_data_space);
6147         msg_out->msg_controllen = cm_data_space;
6148         SAFE_FREE(cm_data);
6149         SAFE_FREE(*tmp_control);
6150
6151         SWRAP_LOG(SWRAP_LOG_TRACE,
6152                   "msg_out->msg_controllen=%zu",
6153                   (size_t)msg_out->msg_controllen);
6154         return ret;
6155 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6156         int saved_errno = errno;
6157         *msg_out = *msg_tmp;
6158         SAFE_FREE(*tmp_control);
6159         errno = saved_errno;
6160         return ret;
6161 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6162 }
6163
6164 static ssize_t swrap_sendmsg_before(int fd,
6165                                     struct socket_info *si,
6166                                     struct msghdr *msg,
6167                                     struct iovec *tmp_iov,
6168                                     struct sockaddr_un *tmp_un,
6169                                     const struct sockaddr_un **to_un,
6170                                     const struct sockaddr **to,
6171                                     int *bcast)
6172 {
6173         size_t i, len = 0;
6174         ssize_t ret = -1;
6175
6176         if (to_un) {
6177                 *to_un = NULL;
6178         }
6179         if (to) {
6180                 *to = NULL;
6181         }
6182         if (bcast) {
6183                 *bcast = 0;
6184         }
6185
6186         SWRAP_LOCK_SI(si);
6187
6188         switch (si->type) {
6189         case SOCK_STREAM: {
6190                 unsigned long mtu;
6191
6192                 if (!si->connected) {
6193                         errno = ENOTCONN;
6194                         goto out;
6195                 }
6196
6197                 if (msg->msg_iovlen == 0) {
6198                         break;
6199                 }
6200
6201                 mtu = socket_wrapper_mtu();
6202                 for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6203                         size_t nlen;
6204                         nlen = len + msg->msg_iov[i].iov_len;
6205                         if (nlen < len) {
6206                                 /* overflow */
6207                                 errno = EMSGSIZE;
6208                                 goto out;
6209                         }
6210                         if (nlen > mtu) {
6211                                 break;
6212                         }
6213                 }
6214                 msg->msg_iovlen = i;
6215                 if (msg->msg_iovlen == 0) {
6216                         *tmp_iov = msg->msg_iov[0];
6217                         tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
6218                                                (size_t)mtu);
6219                         msg->msg_iov = tmp_iov;
6220                         msg->msg_iovlen = 1;
6221                 }
6222                 break;
6223         }
6224         case SOCK_DGRAM:
6225                 if (si->connected) {
6226                         if (msg->msg_name != NULL) {
6227                                 /*
6228                                  * We are dealing with unix sockets and if we
6229                                  * are connected, we should only talk to the
6230                                  * connected unix path. Using the fd to send
6231                                  * to another server would be hard to achieve.
6232                                  */
6233                                 msg->msg_name = NULL;
6234                                 msg->msg_namelen = 0;
6235                         }
6236                 } else {
6237                         const struct sockaddr *msg_name;
6238                         msg_name = (const struct sockaddr *)msg->msg_name;
6239
6240                         if (msg_name == NULL) {
6241                                 errno = ENOTCONN;
6242                                 goto out;
6243                         }
6244
6245
6246                         ret = sockaddr_convert_to_un(si, msg_name, msg->msg_namelen,
6247                                                      tmp_un, 0, bcast);
6248                         if (ret == -1) {
6249                                 goto out;
6250                         }
6251
6252                         if (to_un) {
6253                                 *to_un = tmp_un;
6254                         }
6255                         if (to) {
6256                                 *to = msg_name;
6257                         }
6258                         msg->msg_name = tmp_un;
6259                         msg->msg_namelen = sizeof(*tmp_un);
6260                 }
6261
6262                 if (si->bound == 0) {
6263                         ret = swrap_auto_bind(fd, si, si->family);
6264                         if (ret == -1) {
6265                                 SWRAP_UNLOCK_SI(si);
6266                                 if (errno == ENOTSOCK) {
6267                                         swrap_remove_stale(fd);
6268                                         ret = -ENOTSOCK;
6269                                 } else {
6270                                         SWRAP_LOG(SWRAP_LOG_ERROR, "swrap_sendmsg_before failed");
6271                                 }
6272                                 return ret;
6273                         }
6274                 }
6275
6276                 if (!si->defer_connect) {
6277                         break;
6278                 }
6279
6280                 ret = sockaddr_convert_to_un(si,
6281                                              &si->peername.sa.s,
6282                                              si->peername.sa_socklen,
6283                                              tmp_un,
6284                                              0,
6285                                              NULL);
6286                 if (ret == -1) {
6287                         goto out;
6288                 }
6289
6290                 ret = libc_connect(fd,
6291                                    (struct sockaddr *)(void *)tmp_un,
6292                                    sizeof(*tmp_un));
6293
6294                 /* to give better errors */
6295                 if (ret == -1 && errno == ENOENT) {
6296                         errno = EHOSTUNREACH;
6297                 }
6298
6299                 if (ret == -1) {
6300                         goto out;
6301                 }
6302
6303                 si->defer_connect = 0;
6304                 break;
6305         default:
6306                 errno = EHOSTUNREACH;
6307                 goto out;
6308         }
6309
6310         ret = 0;
6311 out:
6312         SWRAP_UNLOCK_SI(si);
6313
6314         return ret;
6315 }
6316
6317 static void swrap_sendmsg_after(int fd,
6318                                 struct socket_info *si,
6319                                 struct msghdr *msg,
6320                                 const struct sockaddr *to,
6321                                 ssize_t ret)
6322 {
6323         int saved_errno = errno;
6324         size_t i, len = 0;
6325         uint8_t *buf;
6326         off_t ofs = 0;
6327         size_t avail = 0;
6328         size_t remain;
6329
6330         /* to give better errors */
6331         if (ret == -1) {
6332                 if (saved_errno == ENOENT) {
6333                         saved_errno = EHOSTUNREACH;
6334                 } else if (saved_errno == ENOTSOCK) {
6335                         /* If the fd is not a socket, remove it */
6336                         swrap_remove_stale(fd);
6337                 }
6338         }
6339
6340         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6341                 avail += msg->msg_iov[i].iov_len;
6342         }
6343
6344         if (ret == -1) {
6345                 remain = MIN(80, avail);
6346         } else {
6347                 remain = ret;
6348         }
6349
6350         /* we capture it as one single packet */
6351         buf = (uint8_t *)malloc(remain);
6352         if (!buf) {
6353                 /* we just not capture the packet */
6354                 errno = saved_errno;
6355                 return;
6356         }
6357
6358         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6359                 size_t this_time = MIN(remain, (size_t)msg->msg_iov[i].iov_len);
6360                 if (this_time > 0) {
6361                         memcpy(buf + ofs,
6362                                msg->msg_iov[i].iov_base,
6363                                this_time);
6364                 }
6365                 ofs += this_time;
6366                 remain -= this_time;
6367         }
6368         len = ofs;
6369
6370         SWRAP_LOCK_SI(si);
6371
6372         switch (si->type) {
6373         case SOCK_STREAM:
6374                 if (ret == -1) {
6375                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND, buf, len);
6376                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND_RST, NULL, 0);
6377                 } else {
6378                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND, buf, len);
6379                 }
6380                 break;
6381
6382         case SOCK_DGRAM:
6383                 if (si->connected) {
6384                         to = &si->peername.sa.s;
6385                 }
6386                 if (ret == -1) {
6387                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6388                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO_UNREACH, buf, len);
6389                 } else {
6390                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6391                 }
6392                 break;
6393         }
6394
6395         SWRAP_UNLOCK_SI(si);
6396
6397         free(buf);
6398         errno = saved_errno;
6399 }
6400
6401 static int swrap_recvmsg_before(int fd,
6402                                 struct socket_info *si,
6403                                 struct msghdr *msg,
6404                                 struct iovec *tmp_iov)
6405 {
6406         size_t i, len = 0;
6407         int ret = -1;
6408
6409         SWRAP_LOCK_SI(si);
6410
6411         (void)fd; /* unused */
6412
6413         switch (si->type) {
6414         case SOCK_STREAM: {
6415                 unsigned int mtu;
6416                 if (!si->connected) {
6417                         errno = ENOTCONN;
6418                         goto out;
6419                 }
6420
6421                 if (msg->msg_iovlen == 0) {
6422                         break;
6423                 }
6424
6425                 mtu = socket_wrapper_mtu();
6426                 for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6427                         size_t nlen;
6428                         nlen = len + msg->msg_iov[i].iov_len;
6429                         if (nlen > mtu) {
6430                                 break;
6431                         }
6432                 }
6433                 msg->msg_iovlen = i;
6434                 if (msg->msg_iovlen == 0) {
6435                         *tmp_iov = msg->msg_iov[0];
6436                         tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
6437                                                (size_t)mtu);
6438                         msg->msg_iov = tmp_iov;
6439                         msg->msg_iovlen = 1;
6440                 }
6441                 break;
6442         }
6443         case SOCK_DGRAM:
6444                 if (msg->msg_name == NULL) {
6445                         errno = EINVAL;
6446                         goto out;
6447                 }
6448
6449                 if (msg->msg_iovlen == 0) {
6450                         break;
6451                 }
6452
6453                 if (si->bound == 0) {
6454                         ret = swrap_auto_bind(fd, si, si->family);
6455                         if (ret == -1) {
6456                                 SWRAP_UNLOCK_SI(si);
6457                                 /*
6458                                  * When attempting to read or write to a
6459                                  * descriptor, if an underlying autobind fails
6460                                  * because it's not a socket, stop intercepting
6461                                  * uses of that descriptor.
6462                                  */
6463                                 if (errno == ENOTSOCK) {
6464                                         swrap_remove_stale(fd);
6465                                         ret = -ENOTSOCK;
6466                                 } else {
6467                                         SWRAP_LOG(SWRAP_LOG_ERROR,
6468                                                   "swrap_recvmsg_before failed");
6469                                 }
6470                                 return ret;
6471                         }
6472                 }
6473                 break;
6474         default:
6475                 errno = EHOSTUNREACH;
6476                 goto out;
6477         }
6478
6479         ret = 0;
6480 out:
6481         SWRAP_UNLOCK_SI(si);
6482
6483         return ret;
6484 }
6485
6486 static int swrap_recvmsg_after(int fd,
6487                                struct socket_info *si,
6488                                struct msghdr *msg,
6489                                const struct sockaddr_un *un_addr,
6490                                socklen_t un_addrlen,
6491                                ssize_t ret)
6492 {
6493         int saved_errno = errno;
6494         size_t i;
6495         uint8_t *buf = NULL;
6496         off_t ofs = 0;
6497         size_t avail = 0;
6498         size_t remain;
6499         int rc;
6500
6501         /* to give better errors */
6502         if (ret == -1) {
6503                 if (saved_errno == ENOENT) {
6504                         saved_errno = EHOSTUNREACH;
6505                 } else if (saved_errno == ENOTSOCK) {
6506                         /* If the fd is not a socket, remove it */
6507                         swrap_remove_stale(fd);
6508                 }
6509         }
6510
6511         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6512                 avail += msg->msg_iov[i].iov_len;
6513         }
6514
6515         SWRAP_LOCK_SI(si);
6516
6517         /* Convert the socket address before we leave */
6518         if (si->type == SOCK_DGRAM && un_addr != NULL) {
6519                 rc = sockaddr_convert_from_un(si,
6520                                               un_addr,
6521                                               un_addrlen,
6522                                               si->family,
6523                                               msg->msg_name,
6524                                               &msg->msg_namelen);
6525                 if (rc == -1) {
6526                         goto done;
6527                 }
6528         }
6529
6530         if (avail == 0) {
6531                 rc = 0;
6532                 goto done;
6533         }
6534
6535         if (ret == -1) {
6536                 remain = MIN(80, avail);
6537         } else {
6538                 remain = ret;
6539         }
6540
6541         /* we capture it as one single packet */
6542         buf = (uint8_t *)malloc(remain);
6543         if (buf == NULL) {
6544                 /* we just not capture the packet */
6545                 SWRAP_UNLOCK_SI(si);
6546                 errno = saved_errno;
6547                 return -1;
6548         }
6549
6550         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6551                 size_t this_time = MIN(remain, (size_t)msg->msg_iov[i].iov_len);
6552                 memcpy(buf + ofs,
6553                        msg->msg_iov[i].iov_base,
6554                        this_time);
6555                 ofs += this_time;
6556                 remain -= this_time;
6557         }
6558
6559         switch (si->type) {
6560         case SOCK_STREAM:
6561                 if (ret == -1 && saved_errno != EAGAIN && saved_errno != ENOBUFS) {
6562                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV_RST, NULL, 0);
6563                 } else if (ret == 0) { /* END OF FILE */
6564                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV_RST, NULL, 0);
6565                 } else if (ret > 0) {
6566                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV, buf, ret);
6567                 }
6568                 break;
6569
6570         case SOCK_DGRAM:
6571                 if (ret == -1) {
6572                         break;
6573                 }
6574
6575                 if (un_addr != NULL) {
6576                         swrap_pcap_dump_packet(si,
6577                                           msg->msg_name,
6578                                           SWRAP_RECVFROM,
6579                                           buf,
6580                                           ret);
6581                 } else {
6582                         swrap_pcap_dump_packet(si,
6583                                           msg->msg_name,
6584                                           SWRAP_RECV,
6585                                           buf,
6586                                           ret);
6587                 }
6588
6589                 break;
6590         }
6591
6592         rc = 0;
6593 done:
6594         free(buf);
6595         errno = saved_errno;
6596
6597 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6598         if (rc == 0 &&
6599             msg->msg_controllen > 0 &&
6600             msg->msg_control != NULL) {
6601                 rc = swrap_msghdr_add_socket_info(si, msg);
6602                 if (rc < 0) {
6603                         SWRAP_UNLOCK_SI(si);
6604                         return -1;
6605                 }
6606         }
6607 #endif
6608
6609         SWRAP_UNLOCK_SI(si);
6610         return rc;
6611 }
6612
6613 /****************************************************************************
6614  *   RECVFROM
6615  ***************************************************************************/
6616
6617 static ssize_t swrap_recvfrom(int s, void *buf, size_t len, int flags,
6618                               struct sockaddr *from, socklen_t *fromlen)
6619 {
6620         struct swrap_address from_addr = {
6621                 .sa_socklen = sizeof(struct sockaddr_un),
6622         };
6623         ssize_t ret;
6624         struct socket_info *si = find_socket_info(s);
6625         struct swrap_address saddr = {
6626                 .sa_socklen = sizeof(struct sockaddr_storage),
6627         };
6628         struct msghdr msg;
6629         struct iovec tmp;
6630         int tret;
6631
6632         if (!si) {
6633                 return libc_recvfrom(s,
6634                                      buf,
6635                                      len,
6636                                      flags,
6637                                      from,
6638                                      fromlen);
6639         }
6640
6641         tmp.iov_base = buf;
6642         tmp.iov_len = len;
6643
6644         ZERO_STRUCT(msg);
6645         if (from != NULL && fromlen != NULL) {
6646                 msg.msg_name = from;   /* optional address */
6647                 msg.msg_namelen = *fromlen; /* size of address */
6648         } else {
6649                 msg.msg_name = &saddr.sa.s; /* optional address */
6650                 msg.msg_namelen = saddr.sa_socklen; /* size of address */
6651         }
6652         msg.msg_iov = &tmp;            /* scatter/gather array */
6653         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6654 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6655         msg.msg_control = NULL;        /* ancillary data, see below */
6656         msg.msg_controllen = 0;        /* ancillary data buffer len */
6657         msg.msg_flags = 0;             /* flags on received message */
6658 #endif
6659
6660         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6661         if (tret < 0) {
6662                 return -1;
6663         }
6664
6665         buf = msg.msg_iov[0].iov_base;
6666         len = msg.msg_iov[0].iov_len;
6667
6668         ret = libc_recvfrom(s,
6669                             buf,
6670                             len,
6671                             flags,
6672                             &from_addr.sa.s,
6673                             &from_addr.sa_socklen);
6674         if (ret == -1) {
6675                 return ret;
6676         }
6677
6678         tret = swrap_recvmsg_after(s,
6679                                    si,
6680                                    &msg,
6681                                    &from_addr.sa.un,
6682                                    from_addr.sa_socklen,
6683                                    ret);
6684         if (tret != 0) {
6685                 return tret;
6686         }
6687
6688         if (from != NULL && fromlen != NULL) {
6689                 *fromlen = msg.msg_namelen;
6690         }
6691
6692         return ret;
6693 }
6694
6695 #ifdef HAVE_ACCEPT_PSOCKLEN_T
6696 ssize_t recvfrom(int s, void *buf, size_t len, int flags,
6697                  struct sockaddr *from, Psocklen_t fromlen)
6698 #else
6699 ssize_t recvfrom(int s, void *buf, size_t len, int flags,
6700                  struct sockaddr *from, socklen_t *fromlen)
6701 #endif
6702 {
6703         return swrap_recvfrom(s, buf, len, flags, from, (socklen_t *)fromlen);
6704 }
6705
6706 /****************************************************************************
6707  *   SENDTO
6708  ***************************************************************************/
6709
6710 static ssize_t swrap_sendto(int s, const void *buf, size_t len, int flags,
6711                             const struct sockaddr *to, socklen_t tolen)
6712 {
6713         struct msghdr msg;
6714         struct iovec tmp;
6715         struct swrap_address un_addr = {
6716                 .sa_socklen = sizeof(struct sockaddr_un),
6717         };
6718         const struct sockaddr_un *to_un = NULL;
6719         ssize_t ret;
6720         int rc;
6721         struct socket_info *si = find_socket_info(s);
6722         int bcast = 0;
6723
6724         if (!si) {
6725                 return libc_sendto(s, buf, len, flags, to, tolen);
6726         }
6727
6728         tmp.iov_base = discard_const_p(char, buf);
6729         tmp.iov_len = len;
6730
6731         ZERO_STRUCT(msg);
6732         msg.msg_name = discard_const_p(struct sockaddr, to); /* optional address */
6733         msg.msg_namelen = tolen;       /* size of address */
6734         msg.msg_iov = &tmp;            /* scatter/gather array */
6735         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6736 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6737         msg.msg_control = NULL;        /* ancillary data, see below */
6738         msg.msg_controllen = 0;        /* ancillary data buffer len */
6739         msg.msg_flags = 0;             /* flags on received message */
6740 #endif
6741
6742         rc = swrap_sendmsg_before(s,
6743                                   si,
6744                                   &msg,
6745                                   &tmp,
6746                                   &un_addr.sa.un,
6747                                   &to_un,
6748                                   &to,
6749                                   &bcast);
6750         if (rc < 0) {
6751                 return -1;
6752         }
6753
6754         buf = msg.msg_iov[0].iov_base;
6755         len = msg.msg_iov[0].iov_len;
6756
6757         if (bcast) {
6758                 struct stat st;
6759                 unsigned int iface;
6760                 unsigned int prt = ntohs(((const struct sockaddr_in *)(const void *)to)->sin_port);
6761                 char type;
6762                 char *swrap_dir = NULL;
6763
6764                 type = SOCKET_TYPE_CHAR_UDP;
6765
6766                 swrap_dir = socket_wrapper_dir();
6767                 if (swrap_dir == NULL) {
6768                         return -1;
6769                 }
6770
6771                 for(iface=0; iface <= MAX_WRAPPED_INTERFACES; iface++) {
6772                         swrap_un_path(&un_addr.sa.un,
6773                                       swrap_dir,
6774                                       type,
6775                                       iface,
6776                                       prt);
6777                         if (stat(un_addr.sa.un.sun_path, &st) != 0) continue;
6778
6779                         /* ignore the any errors in broadcast sends */
6780                         libc_sendto(s,
6781                                     buf,
6782                                     len,
6783                                     flags,
6784                                     &un_addr.sa.s,
6785                                     un_addr.sa_socklen);
6786                 }
6787
6788                 SAFE_FREE(swrap_dir);
6789
6790                 SWRAP_LOCK_SI(si);
6791
6792                 swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6793
6794                 SWRAP_UNLOCK_SI(si);
6795
6796                 return len;
6797         }
6798
6799         SWRAP_LOCK_SI(si);
6800         /*
6801          * If it is a dgram socket and we are connected, don't include the
6802          * 'to' address.
6803          */
6804         if (si->type == SOCK_DGRAM && si->connected) {
6805                 ret = libc_sendto(s,
6806                                   buf,
6807                                   len,
6808                                   flags,
6809                                   NULL,
6810                                   0);
6811         } else {
6812                 ret = libc_sendto(s,
6813                                   buf,
6814                                   len,
6815                                   flags,
6816                                   (struct sockaddr *)msg.msg_name,
6817                                   msg.msg_namelen);
6818         }
6819
6820         SWRAP_UNLOCK_SI(si);
6821
6822         swrap_sendmsg_after(s, si, &msg, to, ret);
6823
6824         return ret;
6825 }
6826
6827 ssize_t sendto(int s, const void *buf, size_t len, int flags,
6828                const struct sockaddr *to, socklen_t tolen)
6829 {
6830         return swrap_sendto(s, buf, len, flags, to, tolen);
6831 }
6832
6833 /****************************************************************************
6834  *   READV
6835  ***************************************************************************/
6836
6837 static ssize_t swrap_recv(int s, void *buf, size_t len, int flags)
6838 {
6839         struct socket_info *si;
6840         struct msghdr msg;
6841         struct swrap_address saddr = {
6842                 .sa_socklen = sizeof(struct sockaddr_storage),
6843         };
6844         struct iovec tmp;
6845         ssize_t ret;
6846         int tret;
6847
6848         si = find_socket_info(s);
6849         if (si == NULL) {
6850                 return libc_recv(s, buf, len, flags);
6851         }
6852
6853         tmp.iov_base = buf;
6854         tmp.iov_len = len;
6855
6856         ZERO_STRUCT(msg);
6857         msg.msg_name = &saddr.sa.s;    /* optional address */
6858         msg.msg_namelen = saddr.sa_socklen; /* size of address */
6859         msg.msg_iov = &tmp;            /* scatter/gather array */
6860         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6861 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6862         msg.msg_control = NULL;        /* ancillary data, see below */
6863         msg.msg_controllen = 0;        /* ancillary data buffer len */
6864         msg.msg_flags = 0;             /* flags on received message */
6865 #endif
6866
6867         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6868         if (tret < 0) {
6869                 return -1;
6870         }
6871
6872         buf = msg.msg_iov[0].iov_base;
6873         len = msg.msg_iov[0].iov_len;
6874
6875         ret = libc_recv(s, buf, len, flags);
6876
6877         tret = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
6878         if (tret != 0) {
6879                 return tret;
6880         }
6881
6882         return ret;
6883 }
6884
6885 ssize_t recv(int s, void *buf, size_t len, int flags)
6886 {
6887         return swrap_recv(s, buf, len, flags);
6888 }
6889
6890 /****************************************************************************
6891  *   READ
6892  ***************************************************************************/
6893
6894 static ssize_t swrap_read(int s, void *buf, size_t len)
6895 {
6896         struct socket_info *si;
6897         struct msghdr msg;
6898         struct iovec tmp;
6899         struct swrap_address saddr = {
6900                 .sa_socklen = sizeof(struct sockaddr_storage),
6901         };
6902         ssize_t ret;
6903         int tret;
6904
6905         si = find_socket_info(s);
6906         if (si == NULL) {
6907                 return libc_read(s, buf, len);
6908         }
6909
6910         tmp.iov_base = buf;
6911         tmp.iov_len = len;
6912
6913         ZERO_STRUCT(msg);
6914         msg.msg_name = &saddr.sa.ss;   /* optional address */
6915         msg.msg_namelen = saddr.sa_socklen; /* size of address */
6916         msg.msg_iov = &tmp;            /* scatter/gather array */
6917         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6918 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6919         msg.msg_control = NULL;        /* ancillary data, see below */
6920         msg.msg_controllen = 0;        /* ancillary data buffer len */
6921         msg.msg_flags = 0;             /* flags on received message */
6922 #endif
6923
6924         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6925         if (tret < 0) {
6926                 if (tret == -ENOTSOCK) {
6927                         return libc_read(s, buf, len);
6928                 }
6929                 return -1;
6930         }
6931
6932         buf = msg.msg_iov[0].iov_base;
6933         len = msg.msg_iov[0].iov_len;
6934
6935         ret = libc_read(s, buf, len);
6936
6937         tret = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
6938         if (tret != 0) {
6939                 return tret;
6940         }
6941
6942         return ret;
6943 }
6944
6945 ssize_t read(int s, void *buf, size_t len)
6946 {
6947         return swrap_read(s, buf, len);
6948 }
6949
6950 /****************************************************************************
6951  *   WRITE
6952  ***************************************************************************/
6953
6954 static ssize_t swrap_write(int s, const void *buf, size_t len)
6955 {
6956         struct msghdr msg;
6957         struct iovec tmp;
6958         struct sockaddr_un un_addr;
6959         ssize_t ret;
6960         int rc;
6961         struct socket_info *si;
6962
6963         si = find_socket_info(s);
6964         if (si == NULL) {
6965                 return libc_write(s, buf, len);
6966         }
6967
6968         tmp.iov_base = discard_const_p(char, buf);
6969         tmp.iov_len = len;
6970
6971         ZERO_STRUCT(msg);
6972         msg.msg_name = NULL;           /* optional address */
6973         msg.msg_namelen = 0;           /* size of address */
6974         msg.msg_iov = &tmp;            /* scatter/gather array */
6975         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6976 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6977         msg.msg_control = NULL;        /* ancillary data, see below */
6978         msg.msg_controllen = 0;        /* ancillary data buffer len */
6979         msg.msg_flags = 0;             /* flags on received message */
6980 #endif
6981
6982         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
6983         if (rc < 0) {
6984                 return -1;
6985         }
6986
6987         buf = msg.msg_iov[0].iov_base;
6988         len = msg.msg_iov[0].iov_len;
6989
6990         ret = libc_write(s, buf, len);
6991
6992         swrap_sendmsg_after(s, si, &msg, NULL, ret);
6993
6994         return ret;
6995 }
6996
6997 ssize_t write(int s, const void *buf, size_t len)
6998 {
6999         return swrap_write(s, buf, len);
7000 }
7001
7002 /****************************************************************************
7003  *   SEND
7004  ***************************************************************************/
7005
7006 static ssize_t swrap_send(int s, const void *buf, size_t len, int flags)
7007 {
7008         struct msghdr msg;
7009         struct iovec tmp;
7010         struct sockaddr_un un_addr;
7011         ssize_t ret;
7012         int rc;
7013         struct socket_info *si = find_socket_info(s);
7014
7015         if (!si) {
7016                 return libc_send(s, buf, len, flags);
7017         }
7018
7019         tmp.iov_base = discard_const_p(char, buf);
7020         tmp.iov_len = len;
7021
7022         ZERO_STRUCT(msg);
7023         msg.msg_name = NULL;           /* optional address */
7024         msg.msg_namelen = 0;           /* size of address */
7025         msg.msg_iov = &tmp;            /* scatter/gather array */
7026         msg.msg_iovlen = 1;            /* # elements in msg_iov */
7027 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7028         msg.msg_control = NULL;        /* ancillary data, see below */
7029         msg.msg_controllen = 0;        /* ancillary data buffer len */
7030         msg.msg_flags = 0;             /* flags on received message */
7031 #endif
7032
7033         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
7034         if (rc < 0) {
7035                 return -1;
7036         }
7037
7038         buf = msg.msg_iov[0].iov_base;
7039         len = msg.msg_iov[0].iov_len;
7040
7041         ret = libc_send(s, buf, len, flags);
7042
7043         swrap_sendmsg_after(s, si, &msg, NULL, ret);
7044
7045         return ret;
7046 }
7047
7048 ssize_t send(int s, const void *buf, size_t len, int flags)
7049 {
7050         return swrap_send(s, buf, len, flags);
7051 }
7052
7053 /****************************************************************************
7054  *   RECVMSG
7055  ***************************************************************************/
7056
7057 static ssize_t swrap_recvmsg(int s, struct msghdr *omsg, int flags)
7058 {
7059         struct swrap_address from_addr = {
7060                 .sa_socklen = sizeof(struct sockaddr_un),
7061         };
7062         struct swrap_address convert_addr = {
7063                 .sa_socklen = sizeof(struct sockaddr_storage),
7064         };
7065         struct socket_info *si;
7066         struct msghdr msg;
7067         struct iovec tmp;
7068 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7069         size_t msg_ctrllen_filled;
7070         size_t msg_ctrllen_left;
7071 #endif
7072
7073         ssize_t ret;
7074         int rc;
7075
7076         si = find_socket_info(s);
7077         if (si == NULL) {
7078                 uint8_t *tmp_control = NULL;
7079                 rc = swrap_recvmsg_before_unix(omsg, &msg, &tmp_control);
7080                 if (rc < 0) {
7081                         return rc;
7082                 }
7083                 ret = libc_recvmsg(s, &msg, flags);
7084                 return swrap_recvmsg_after_unix(&msg, &tmp_control, omsg, ret);
7085         }
7086
7087         tmp.iov_base = NULL;
7088         tmp.iov_len = 0;
7089
7090         ZERO_STRUCT(msg);
7091         msg.msg_name = &from_addr.sa;              /* optional address */
7092         msg.msg_namelen = from_addr.sa_socklen;    /* size of address */
7093         msg.msg_iov = omsg->msg_iov;               /* scatter/gather array */
7094         msg.msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7095 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7096         msg_ctrllen_filled = 0;
7097         msg_ctrllen_left = omsg->msg_controllen;
7098
7099         msg.msg_control = omsg->msg_control;       /* ancillary data, see below */
7100         msg.msg_controllen = omsg->msg_controllen; /* ancillary data buffer len */
7101         msg.msg_flags = omsg->msg_flags;           /* flags on received message */
7102 #endif
7103
7104         rc = swrap_recvmsg_before(s, si, &msg, &tmp);
7105         if (rc < 0) {
7106                 return -1;
7107         }
7108
7109         ret = libc_recvmsg(s, &msg, flags);
7110
7111 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7112         msg_ctrllen_filled += msg.msg_controllen;
7113         msg_ctrllen_left -= msg.msg_controllen;
7114
7115         if (omsg->msg_control != NULL) {
7116                 uint8_t *p;
7117
7118                 p = omsg->msg_control;
7119                 p += msg_ctrllen_filled;
7120
7121                 msg.msg_control = p;
7122                 msg.msg_controllen = msg_ctrllen_left;
7123         } else {
7124                 msg.msg_control = NULL;
7125                 msg.msg_controllen = 0;
7126         }
7127 #endif
7128
7129         /*
7130          * We convert the unix address to a IP address so we need a buffer
7131          * which can store the address in case of SOCK_DGRAM, see below.
7132          */
7133         msg.msg_name = &convert_addr.sa;
7134         msg.msg_namelen = convert_addr.sa_socklen;
7135
7136         rc = swrap_recvmsg_after(s,
7137                                  si,
7138                                  &msg,
7139                                  &from_addr.sa.un,
7140                                  from_addr.sa_socklen,
7141                                  ret);
7142         if (rc != 0) {
7143                 return rc;
7144         }
7145
7146 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7147         if (omsg->msg_control != NULL) {
7148                 /* msg.msg_controllen = space left */
7149                 msg_ctrllen_left = msg.msg_controllen;
7150                 msg_ctrllen_filled = omsg->msg_controllen - msg_ctrllen_left;
7151         }
7152
7153         /* Update the original message length */
7154         omsg->msg_controllen = msg_ctrllen_filled;
7155         omsg->msg_flags = msg.msg_flags;
7156 #endif
7157         omsg->msg_iovlen = msg.msg_iovlen;
7158
7159         SWRAP_LOCK_SI(si);
7160
7161         /*
7162          * From the manpage:
7163          *
7164          * The  msg_name  field  points  to a caller-allocated buffer that is
7165          * used to return the source address if the socket is unconnected.  The
7166          * caller should set msg_namelen to the size of this buffer before this
7167          * call; upon return from a successful call, msg_name will contain the
7168          * length of the returned address.  If the application  does  not  need
7169          * to know the source address, msg_name can be specified as NULL.
7170          */
7171         if (si->type == SOCK_STREAM) {
7172                 omsg->msg_namelen = 0;
7173         } else if (omsg->msg_name != NULL &&
7174                    omsg->msg_namelen != 0 &&
7175                    omsg->msg_namelen >= msg.msg_namelen) {
7176                 memcpy(omsg->msg_name, msg.msg_name, msg.msg_namelen);
7177                 omsg->msg_namelen = msg.msg_namelen;
7178         }
7179
7180         SWRAP_UNLOCK_SI(si);
7181
7182         return ret;
7183 }
7184
7185 ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags)
7186 {
7187         return swrap_recvmsg(sockfd, msg, flags);
7188 }
7189
7190 /****************************************************************************
7191  *   SENDMSG
7192  ***************************************************************************/
7193
7194 static ssize_t swrap_sendmsg(int s, const struct msghdr *omsg, int flags)
7195 {
7196         struct msghdr msg;
7197         struct iovec tmp;
7198         struct sockaddr_un un_addr;
7199         const struct sockaddr_un *to_un = NULL;
7200         const struct sockaddr *to = NULL;
7201         ssize_t ret;
7202         int rc;
7203         struct socket_info *si = find_socket_info(s);
7204         int bcast = 0;
7205
7206         if (!si) {
7207                 int scm_rights_pipe_fd = -1;
7208
7209                 rc = swrap_sendmsg_before_unix(omsg, &msg,
7210                                                &scm_rights_pipe_fd);
7211                 if (rc < 0) {
7212                         return rc;
7213                 }
7214                 ret = libc_sendmsg(s, &msg, flags);
7215                 return swrap_sendmsg_after_unix(&msg, ret, scm_rights_pipe_fd);
7216         }
7217
7218         ZERO_STRUCT(un_addr);
7219
7220         tmp.iov_base = NULL;
7221         tmp.iov_len = 0;
7222
7223         ZERO_STRUCT(msg);
7224
7225         SWRAP_LOCK_SI(si);
7226
7227         if (si->connected == 0) {
7228                 msg.msg_name = omsg->msg_name;             /* optional address */
7229                 msg.msg_namelen = omsg->msg_namelen;       /* size of address */
7230         }
7231         msg.msg_iov = omsg->msg_iov;               /* scatter/gather array */
7232         msg.msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7233
7234         SWRAP_UNLOCK_SI(si);
7235
7236 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7237         if (omsg != NULL && omsg->msg_controllen > 0 && omsg->msg_control != NULL) {
7238                 uint8_t *cmbuf = NULL;
7239                 size_t cmlen = 0;
7240
7241                 rc = swrap_sendmsg_filter_cmsghdr(omsg, &cmbuf, &cmlen);
7242                 if (rc < 0) {
7243                         return rc;
7244                 }
7245
7246                 if (cmlen == 0) {
7247                         msg.msg_controllen = 0;
7248                         msg.msg_control = NULL;
7249                 } else {
7250                         msg.msg_control = cmbuf;
7251                         msg.msg_controllen = cmlen;
7252                 }
7253         }
7254         msg.msg_flags = omsg->msg_flags;           /* flags on received message */
7255 #endif
7256         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, &to_un, &to, &bcast);
7257         if (rc < 0) {
7258                 int saved_errno = errno;
7259 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7260                 SAFE_FREE(msg.msg_control);
7261 #endif
7262                 errno = saved_errno;
7263                 return -1;
7264         }
7265
7266         if (bcast) {
7267                 struct stat st;
7268                 unsigned int iface;
7269                 unsigned int prt = ntohs(((const struct sockaddr_in *)(const void *)to)->sin_port);
7270                 char type;
7271                 size_t i, len = 0;
7272                 uint8_t *buf;
7273                 off_t ofs = 0;
7274                 size_t avail = 0;
7275                 size_t remain;
7276                 char *swrap_dir = NULL;
7277
7278                 for (i = 0; i < (size_t)msg.msg_iovlen; i++) {
7279                         avail += msg.msg_iov[i].iov_len;
7280                 }
7281
7282                 len = avail;
7283                 remain = avail;
7284
7285                 /* we capture it as one single packet */
7286                 buf = (uint8_t *)malloc(remain);
7287                 if (!buf) {
7288                         int saved_errno = errno;
7289 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7290                         SAFE_FREE(msg.msg_control);
7291 #endif
7292                         errno = saved_errno;
7293                         return -1;
7294                 }
7295
7296                 for (i = 0; i < (size_t)msg.msg_iovlen; i++) {
7297                         size_t this_time = MIN(remain, (size_t)msg.msg_iov[i].iov_len);
7298                         memcpy(buf + ofs,
7299                                msg.msg_iov[i].iov_base,
7300                                this_time);
7301                         ofs += this_time;
7302                         remain -= this_time;
7303                 }
7304
7305                 type = SOCKET_TYPE_CHAR_UDP;
7306
7307                 swrap_dir = socket_wrapper_dir();
7308                 if (swrap_dir == NULL) {
7309                         int saved_errno = errno;
7310 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7311                         SAFE_FREE(msg.msg_control);
7312 #endif
7313                         SAFE_FREE(buf);
7314                         errno = saved_errno;
7315                         return -1;
7316                 }
7317
7318                 for(iface=0; iface <= MAX_WRAPPED_INTERFACES; iface++) {
7319                         swrap_un_path(&un_addr, swrap_dir, type, iface, prt);
7320                         if (stat(un_addr.sun_path, &st) != 0) continue;
7321
7322                         msg.msg_name = &un_addr;           /* optional address */
7323                         msg.msg_namelen = sizeof(un_addr); /* size of address */
7324
7325                         /* ignore the any errors in broadcast sends */
7326                         libc_sendmsg(s, &msg, flags);
7327                 }
7328
7329                 SAFE_FREE(swrap_dir);
7330
7331                 SWRAP_LOCK_SI(si);
7332
7333                 swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
7334                 free(buf);
7335
7336                 SWRAP_UNLOCK_SI(si);
7337
7338                 return len;
7339         }
7340
7341         ret = libc_sendmsg(s, &msg, flags);
7342
7343         swrap_sendmsg_after(s, si, &msg, to, ret);
7344
7345 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7346         {
7347                 int saved_errno = errno;
7348                 SAFE_FREE(msg.msg_control);
7349                 errno = saved_errno;
7350         }
7351 #endif
7352
7353         return ret;
7354 }
7355
7356 ssize_t sendmsg(int s, const struct msghdr *omsg, int flags)
7357 {
7358         return swrap_sendmsg(s, omsg, flags);
7359 }
7360
7361 /****************************************************************************
7362  *   READV
7363  ***************************************************************************/
7364
7365 static ssize_t swrap_readv(int s, const struct iovec *vector, int count)
7366 {
7367         struct socket_info *si;
7368         struct msghdr msg;
7369         struct iovec tmp;
7370         struct swrap_address saddr = {
7371                 .sa_socklen = sizeof(struct sockaddr_storage)
7372         };
7373         ssize_t ret;
7374         int rc;
7375
7376         si = find_socket_info(s);
7377         if (si == NULL) {
7378                 return libc_readv(s, vector, count);
7379         }
7380
7381         tmp.iov_base = NULL;
7382         tmp.iov_len = 0;
7383
7384         ZERO_STRUCT(msg);
7385         msg.msg_name = &saddr.sa.s; /* optional address */
7386         msg.msg_namelen = saddr.sa_socklen;      /* size of address */
7387         msg.msg_iov = discard_const_p(struct iovec, vector); /* scatter/gather array */
7388         msg.msg_iovlen = count;        /* # elements in msg_iov */
7389 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7390         msg.msg_control = NULL;        /* ancillary data, see below */
7391         msg.msg_controllen = 0;        /* ancillary data buffer len */
7392         msg.msg_flags = 0;             /* flags on received message */
7393 #endif
7394
7395         rc = swrap_recvmsg_before(s, si, &msg, &tmp);
7396         if (rc < 0) {
7397                 if (rc == -ENOTSOCK) {
7398                         return libc_readv(s, vector, count);
7399                 }
7400                 return -1;
7401         }
7402
7403         ret = libc_readv(s, msg.msg_iov, msg.msg_iovlen);
7404
7405         rc = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
7406         if (rc != 0) {
7407                 return rc;
7408         }
7409
7410         return ret;
7411 }
7412
7413 ssize_t readv(int s, const struct iovec *vector, int count)
7414 {
7415         return swrap_readv(s, vector, count);
7416 }
7417
7418 /****************************************************************************
7419  *   WRITEV
7420  ***************************************************************************/
7421
7422 static ssize_t swrap_writev(int s, const struct iovec *vector, int count)
7423 {
7424         struct msghdr msg;
7425         struct iovec tmp;
7426         struct sockaddr_un un_addr;
7427         ssize_t ret;
7428         int rc;
7429         struct socket_info *si = find_socket_info(s);
7430
7431         if (!si) {
7432                 return libc_writev(s, vector, count);
7433         }
7434
7435         tmp.iov_base = NULL;
7436         tmp.iov_len = 0;
7437
7438         ZERO_STRUCT(msg);
7439         msg.msg_name = NULL;           /* optional address */
7440         msg.msg_namelen = 0;           /* size of address */
7441         msg.msg_iov = discard_const_p(struct iovec, vector); /* scatter/gather array */
7442         msg.msg_iovlen = count;        /* # elements in msg_iov */
7443 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7444         msg.msg_control = NULL;        /* ancillary data, see below */
7445         msg.msg_controllen = 0;        /* ancillary data buffer len */
7446         msg.msg_flags = 0;             /* flags on received message */
7447 #endif
7448
7449         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
7450         if (rc < 0) {
7451                 if (rc == -ENOTSOCK) {
7452                         return libc_readv(s, vector, count);
7453                 }
7454                 return -1;
7455         }
7456
7457         ret = libc_writev(s, msg.msg_iov, msg.msg_iovlen);
7458
7459         swrap_sendmsg_after(s, si, &msg, NULL, ret);
7460
7461         return ret;
7462 }
7463
7464 ssize_t writev(int s, const struct iovec *vector, int count)
7465 {
7466         return swrap_writev(s, vector, count);
7467 }
7468
7469 /****************************
7470  * CLOSE
7471  ***************************/
7472
7473 static int swrap_remove_wrapper(const char *__func_name,
7474                                 int (*__close_fd_fn)(int fd),
7475                                 int fd)
7476 {
7477         struct socket_info *si = NULL;
7478         int si_index;
7479         int ret_errno = errno;
7480         int ret;
7481
7482         swrap_mutex_lock(&socket_reset_mutex);
7483
7484         si_index = find_socket_info_index(fd);
7485         if (si_index == -1) {
7486                 swrap_mutex_unlock(&socket_reset_mutex);
7487                 return __close_fd_fn(fd);
7488         }
7489
7490         swrap_log(SWRAP_LOG_TRACE, __func_name, "Remove wrapper for fd=%d", fd);
7491         reset_socket_info_index(fd);
7492
7493         si = swrap_get_socket_info(si_index);
7494
7495         swrap_mutex_lock(&first_free_mutex);
7496         SWRAP_LOCK_SI(si);
7497
7498         ret = __close_fd_fn(fd);
7499         if (ret == -1) {
7500                 ret_errno = errno;
7501         }
7502
7503         swrap_dec_refcount(si);
7504
7505         if (swrap_get_refcount(si) > 0) {
7506                 /* there are still references left */
7507                 goto out;
7508         }
7509
7510         if (si->fd_passed) {
7511                 goto set_next_free;
7512         }
7513
7514         if (si->myname.sa_socklen > 0 && si->peername.sa_socklen > 0) {
7515                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_SEND, NULL, 0);
7516         }
7517
7518         if (si->myname.sa_socklen > 0 && si->peername.sa_socklen > 0) {
7519                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_RECV, NULL, 0);
7520                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_ACK, NULL, 0);
7521         }
7522
7523         if (si->un_addr.sun_path[0] != '\0') {
7524                 unlink(si->un_addr.sun_path);
7525         }
7526
7527 set_next_free:
7528         swrap_set_next_free(si, first_free);
7529         first_free = si_index;
7530
7531 out:
7532         SWRAP_UNLOCK_SI(si);
7533         swrap_mutex_unlock(&first_free_mutex);
7534         swrap_mutex_unlock(&socket_reset_mutex);
7535
7536         errno = ret_errno;
7537         return ret;
7538 }
7539
7540 static int swrap_noop_close(int fd)
7541 {
7542         (void)fd; /* unused */
7543         return 0;
7544 }
7545
7546 static void swrap_remove_stale(int fd)
7547 {
7548         swrap_remove_wrapper(__func__, swrap_noop_close, fd);
7549 }
7550
7551 /*
7552  * This allows socket_wrapper aware applications to
7553  * indicate that the given fd does not belong to
7554  * an inet socket.
7555  *
7556  * We already overload a lot of unrelated functions
7557  * like eventfd(), timerfd_create(), ... in order to
7558  * call swrap_remove_stale() on the returned fd, but
7559  * we'll never be able to handle all possible syscalls.
7560  *
7561  * socket_wrapper_indicate_no_inet_fd() gives them a way
7562  * to do the same.
7563  *
7564  * We don't export swrap_remove_stale() in order to
7565  * make it easier to analyze SOCKET_WRAPPER_DEBUGLEVEL=3
7566  * log files.
7567  */
7568 void socket_wrapper_indicate_no_inet_fd(int fd)
7569 {
7570         swrap_remove_wrapper(__func__, swrap_noop_close, fd);
7571 }
7572
7573 static int swrap_close(int fd)
7574 {
7575         return swrap_remove_wrapper(__func__, libc_close, fd);
7576 }
7577
7578 int close(int fd)
7579 {
7580         return swrap_close(fd);
7581 }
7582
7583 #ifdef HAVE___CLOSE_NOCANCEL
7584
7585 static int swrap___close_nocancel(int fd)
7586 {
7587         return swrap_remove_wrapper(__func__, libc___close_nocancel, fd);
7588 }
7589
7590 int __close_nocancel(int fd);
7591 int __close_nocancel(int fd)
7592 {
7593         return swrap___close_nocancel(fd);
7594 }
7595
7596 #endif /* HAVE___CLOSE_NOCANCEL */
7597
7598 /****************************
7599  * DUP
7600  ***************************/
7601
7602 static int swrap_dup(int fd)
7603 {
7604         struct socket_info *si;
7605         int dup_fd, idx;
7606
7607         idx = find_socket_info_index(fd);
7608         if (idx == -1) {
7609                 return libc_dup(fd);
7610         }
7611
7612         si = swrap_get_socket_info(idx);
7613
7614         dup_fd = libc_dup(fd);
7615         if (dup_fd == -1) {
7616                 int saved_errno = errno;
7617                 errno = saved_errno;
7618                 return -1;
7619         }
7620
7621         if ((size_t)dup_fd >= socket_fds_max) {
7622                 SWRAP_LOG(SWRAP_LOG_ERROR,
7623                           "The max socket index limit of %zu has been reached, "
7624                           "trying to add %d",
7625                           socket_fds_max,
7626                           dup_fd);
7627                 libc_close(dup_fd);
7628                 errno = EMFILE;
7629                 return -1;
7630         }
7631
7632         SWRAP_LOCK_SI(si);
7633
7634         swrap_inc_refcount(si);
7635
7636         SWRAP_UNLOCK_SI(si);
7637
7638         /* Make sure we don't have an entry for the fd */
7639         swrap_remove_stale(dup_fd);
7640
7641         set_socket_info_index(dup_fd, idx);
7642
7643         return dup_fd;
7644 }
7645
7646 int dup(int fd)
7647 {
7648         return swrap_dup(fd);
7649 }
7650
7651 /****************************
7652  * DUP2
7653  ***************************/
7654
7655 static int swrap_dup2(int fd, int newfd)
7656 {
7657         struct socket_info *si;
7658         int dup_fd, idx;
7659
7660         idx = find_socket_info_index(fd);
7661         if (idx == -1) {
7662                 return libc_dup2(fd, newfd);
7663         }
7664
7665         si = swrap_get_socket_info(idx);
7666
7667         if (fd == newfd) {
7668                 /*
7669                  * According to the manpage:
7670                  *
7671                  * "If oldfd is a valid file descriptor, and newfd has the same
7672                  * value as oldfd, then dup2() does nothing, and returns newfd."
7673                  */
7674                 return newfd;
7675         }
7676
7677         if ((size_t)newfd >= socket_fds_max) {
7678                 SWRAP_LOG(SWRAP_LOG_ERROR,
7679                           "The max socket index limit of %zu has been reached, "
7680                           "trying to add %d",
7681                           socket_fds_max,
7682                           newfd);
7683                 errno = EMFILE;
7684                 return -1;
7685         }
7686
7687         if (find_socket_info(newfd)) {
7688                 /* dup2() does an implicit close of newfd, which we
7689                  * need to emulate */
7690                 swrap_close(newfd);
7691         }
7692
7693         dup_fd = libc_dup2(fd, newfd);
7694         if (dup_fd == -1) {
7695                 int saved_errno = errno;
7696                 errno = saved_errno;
7697                 return -1;
7698         }
7699
7700         SWRAP_LOCK_SI(si);
7701
7702         swrap_inc_refcount(si);
7703
7704         SWRAP_UNLOCK_SI(si);
7705
7706         /* Make sure we don't have an entry for the fd */
7707         swrap_remove_stale(dup_fd);
7708
7709         set_socket_info_index(dup_fd, idx);
7710
7711         return dup_fd;
7712 }
7713
7714 int dup2(int fd, int newfd)
7715 {
7716         return swrap_dup2(fd, newfd);
7717 }
7718
7719 /****************************
7720  * FCNTL
7721  ***************************/
7722
7723 static int swrap_vfcntl(int fd, int cmd, va_list va)
7724 {
7725         struct socket_info *si;
7726         int rc, dup_fd, idx;
7727
7728         idx = find_socket_info_index(fd);
7729         if (idx == -1) {
7730                 return libc_vfcntl(fd, cmd, va);
7731         }
7732
7733         si = swrap_get_socket_info(idx);
7734
7735         switch (cmd) {
7736         case F_DUPFD:
7737                 dup_fd = libc_vfcntl(fd, cmd, va);
7738                 if (dup_fd == -1) {
7739                         int saved_errno = errno;
7740                         errno = saved_errno;
7741                         return -1;
7742                 }
7743
7744                 /* Make sure we don't have an entry for the fd */
7745                 swrap_remove_stale(dup_fd);
7746
7747                 if ((size_t)dup_fd >= socket_fds_max) {
7748                         SWRAP_LOG(SWRAP_LOG_ERROR,
7749                           "The max socket index limit of %zu has been reached, "
7750                           "trying to add %d",
7751                           socket_fds_max,
7752                           dup_fd);
7753                         libc_close(dup_fd);
7754                         errno = EMFILE;
7755                         return -1;
7756                 }
7757
7758                 SWRAP_LOCK_SI(si);
7759
7760                 swrap_inc_refcount(si);
7761
7762                 SWRAP_UNLOCK_SI(si);
7763
7764
7765                 set_socket_info_index(dup_fd, idx);
7766
7767                 rc = dup_fd;
7768                 break;
7769         default:
7770                 rc = libc_vfcntl(fd, cmd, va);
7771                 break;
7772         }
7773
7774         return rc;
7775 }
7776
7777 int fcntl(int fd, int cmd, ...)
7778 {
7779         va_list va;
7780         int rc;
7781
7782         va_start(va, cmd);
7783
7784         rc = swrap_vfcntl(fd, cmd, va);
7785
7786         va_end(va);
7787
7788         return rc;
7789 }
7790
7791 /****************************
7792  * EVENTFD
7793  ***************************/
7794
7795 #ifdef HAVE_EVENTFD
7796 static int swrap_eventfd(int count, int flags)
7797 {
7798         int fd;
7799
7800         fd = libc_eventfd(count, flags);
7801         if (fd != -1) {
7802                 swrap_remove_stale(fd);
7803         }
7804
7805         return fd;
7806 }
7807
7808 #ifdef HAVE_EVENTFD_UNSIGNED_INT
7809 int eventfd(unsigned int count, int flags)
7810 #else
7811 int eventfd(int count, int flags)
7812 #endif
7813 {
7814         return swrap_eventfd(count, flags);
7815 }
7816 #endif
7817
7818 #ifdef HAVE_PLEDGE
7819 int pledge(const char *promises, const char *paths[])
7820 {
7821         (void)promises; /* unused */
7822         (void)paths; /* unused */
7823
7824         return 0;
7825 }
7826 #endif /* HAVE_PLEDGE */
7827
7828 static void swrap_thread_prepare(void)
7829 {
7830         /*
7831          * This function should only be called here!!
7832          *
7833          * We bind all symobls to avoid deadlocks of the fork is
7834          * interrupted by a signal handler using a symbol of this
7835          * library.
7836          */
7837         swrap_bind_symbol_all();
7838
7839         SWRAP_LOCK_ALL;
7840 }
7841
7842 static void swrap_thread_parent(void)
7843 {
7844         SWRAP_UNLOCK_ALL;
7845 }
7846
7847 static void swrap_thread_child(void)
7848 {
7849         SWRAP_REINIT_ALL;
7850 }
7851
7852 /****************************
7853  * CONSTRUCTOR
7854  ***************************/
7855 void swrap_constructor(void)
7856 {
7857         if (PIPE_BUF < sizeof(struct swrap_unix_scm_rights)) {
7858                 SWRAP_LOG(SWRAP_LOG_ERROR,
7859                           "PIPE_BUF=%zu < "
7860                           "sizeof(struct swrap_unix_scm_rights)=%zu\n"
7861                           "sizeof(struct swrap_unix_scm_rights_payload)=%zu "
7862                           "sizeof(struct socket_info)=%zu",
7863                           (size_t)PIPE_BUF,
7864                           sizeof(struct swrap_unix_scm_rights),
7865                           sizeof(struct swrap_unix_scm_rights_payload),
7866                           sizeof(struct socket_info));
7867                 exit(-1);
7868         }
7869
7870         SWRAP_REINIT_ALL;
7871
7872         /*
7873         * If we hold a lock and the application forks, then the child
7874         * is not able to unlock the mutex and we are in a deadlock.
7875         * This should prevent such deadlocks.
7876         */
7877         pthread_atfork(&swrap_thread_prepare,
7878                        &swrap_thread_parent,
7879                        &swrap_thread_child);
7880 }
7881
7882 /****************************
7883  * DESTRUCTOR
7884  ***************************/
7885
7886 /*
7887  * This function is called when the library is unloaded and makes sure that
7888  * sockets get closed and the unix file for the socket are unlinked.
7889  */
7890 void swrap_destructor(void)
7891 {
7892         size_t i;
7893
7894         if (socket_fds_idx != NULL) {
7895                 for (i = 0; i < socket_fds_max; ++i) {
7896                         if (socket_fds_idx[i] != -1) {
7897                                 swrap_close(i);
7898                         }
7899                 }
7900                 SAFE_FREE(socket_fds_idx);
7901         }
7902
7903         SAFE_FREE(sockets);
7904
7905         if (swrap.libc.handle != NULL
7906 #ifdef RTLD_NEXT
7907             && swrap.libc.handle != RTLD_NEXT
7908 #endif
7909                         ) {
7910                 dlclose(swrap.libc.handle);
7911         }
7912         if (swrap.libc.socket_handle
7913 #ifdef RTLD_NEXT
7914             && swrap.libc.socket_handle != RTLD_NEXT
7915 #endif
7916                         ) {
7917                 dlclose(swrap.libc.socket_handle);
7918         }
7919 }
7920
7921 #if defined(HAVE__SOCKET) && defined(HAVE__CLOSE)
7922 /*
7923  * On FreeBSD 12 (and maybe other platforms)
7924  * system libraries like libresolv prefix there
7925  * syscalls with '_' in order to always use
7926  * the symbols from libc.
7927  *
7928  * In the interaction with resolv_wrapper,
7929  * we need to inject socket wrapper into libresolv,
7930  * which means we need to private all socket
7931  * related syscalls also with the '_' prefix.
7932  *
7933  * This is tested in Samba's 'make test',
7934  * there we noticed that providing '_read',
7935  * '_open' and '_close' would cause errors, which
7936  * means we skip '_read', '_write' and
7937  * all non socket related calls without
7938  * further analyzing the problem.
7939  */
7940 #define SWRAP_SYMBOL_ALIAS(__sym, __aliassym) \
7941         extern typeof(__sym) __aliassym __attribute__ ((alias(#__sym)))
7942
7943 #ifdef HAVE_ACCEPT4
7944 SWRAP_SYMBOL_ALIAS(accept4, _accept4);
7945 #endif
7946 SWRAP_SYMBOL_ALIAS(accept, _accept);
7947 SWRAP_SYMBOL_ALIAS(bind, _bind);
7948 SWRAP_SYMBOL_ALIAS(connect, _connect);
7949 SWRAP_SYMBOL_ALIAS(dup, _dup);
7950 SWRAP_SYMBOL_ALIAS(dup2, _dup2);
7951 SWRAP_SYMBOL_ALIAS(fcntl, _fcntl);
7952 SWRAP_SYMBOL_ALIAS(getpeername, _getpeername);
7953 SWRAP_SYMBOL_ALIAS(getsockname, _getsockname);
7954 SWRAP_SYMBOL_ALIAS(getsockopt, _getsockopt);
7955 SWRAP_SYMBOL_ALIAS(ioctl, _ioctl);
7956 SWRAP_SYMBOL_ALIAS(listen, _listen);
7957 SWRAP_SYMBOL_ALIAS(readv, _readv);
7958 SWRAP_SYMBOL_ALIAS(recv, _recv);
7959 SWRAP_SYMBOL_ALIAS(recvfrom, _recvfrom);
7960 SWRAP_SYMBOL_ALIAS(recvmsg, _recvmsg);
7961 SWRAP_SYMBOL_ALIAS(send, _send);
7962 SWRAP_SYMBOL_ALIAS(sendmsg, _sendmsg);
7963 SWRAP_SYMBOL_ALIAS(sendto, _sendto);
7964 SWRAP_SYMBOL_ALIAS(setsockopt, _setsockopt);
7965 SWRAP_SYMBOL_ALIAS(socket, _socket);
7966 SWRAP_SYMBOL_ALIAS(socketpair, _socketpair);
7967 SWRAP_SYMBOL_ALIAS(writev, _writev);
7968
7969 #endif /* SOCKET_WRAPPER_EXPORT_UNDERSCORE_SYMBOLS */