aefd526ddfe633f4e158138534a0d0b8d6ea3095
[socket_wrapper.git] / src / socket_wrapper.c
1 /*
2  * BSD 3-Clause License
3  *
4  * Copyright (c) 2005-2008, Jelmer Vernooij <jelmer@samba.org>
5  * Copyright (c) 2006-2021, Stefan Metzmacher <metze@samba.org>
6  * Copyright (c) 2013-2021, Andreas Schneider <asn@samba.org>
7  * Copyright (c) 2014-2017, Michael Adam <obnox@samba.org>
8  * Copyright (c) 2016-2018, Anoop C S <anoopcs@redhat.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  *
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * 3. Neither the name of the author nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38
39 /*
40    Socket wrapper library. Passes all socket communication over
41    unix domain sockets if the environment variable SOCKET_WRAPPER_DIR
42    is set.
43 */
44
45 #include "config.h"
46
47 #include <sys/types.h>
48 #include <sys/time.h>
49 #include <sys/stat.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #ifdef HAVE_SYS_FILIO_H
53 #include <sys/filio.h>
54 #endif
55 #ifdef HAVE_SYS_SIGNALFD_H
56 #include <sys/signalfd.h>
57 #endif
58 #ifdef HAVE_SYS_EVENTFD_H
59 #include <sys/eventfd.h>
60 #endif
61 #ifdef HAVE_SYS_TIMERFD_H
62 #include <sys/timerfd.h>
63 #endif
64 #include <sys/uio.h>
65 #include <errno.h>
66 #include <sys/un.h>
67 #include <netinet/in.h>
68 #include <netinet/tcp.h>
69 #ifdef HAVE_NETINET_TCP_FSM_H
70 #include <netinet/tcp_fsm.h>
71 #endif
72 #include <arpa/inet.h>
73 #include <fcntl.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <stdio.h>
77 #include <stdint.h>
78 #include <stdarg.h>
79 #include <stdbool.h>
80 #include <unistd.h>
81 #ifdef HAVE_GNU_LIB_NAMES_H
82 #include <gnu/lib-names.h>
83 #endif
84 #ifdef HAVE_RPC_RPC_H
85 #include <rpc/rpc.h>
86 #endif
87 #include <pthread.h>
88
89 #include "socket_wrapper.h"
90
91 enum swrap_dbglvl_e {
92         SWRAP_LOG_ERROR = 0,
93         SWRAP_LOG_WARN,
94         SWRAP_LOG_DEBUG,
95         SWRAP_LOG_TRACE
96 };
97
98 /* GCC have printf type attribute check. */
99 #ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT
100 #define PRINTF_ATTRIBUTE(a,b) __attribute__ ((__format__ (__printf__, a, b)))
101 #else
102 #define PRINTF_ATTRIBUTE(a,b)
103 #endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */
104
105 #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
106 #define CONSTRUCTOR_ATTRIBUTE __attribute__ ((constructor))
107 #else
108 #define CONSTRUCTOR_ATTRIBUTE
109 #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
110
111 #ifdef HAVE_DESTRUCTOR_ATTRIBUTE
112 #define DESTRUCTOR_ATTRIBUTE __attribute__ ((destructor))
113 #else
114 #define DESTRUCTOR_ATTRIBUTE
115 #endif
116
117 #ifndef FALL_THROUGH
118 # ifdef HAVE_FALLTHROUGH_ATTRIBUTE
119 #  define FALL_THROUGH __attribute__ ((fallthrough))
120 # else /* HAVE_FALLTHROUGH_ATTRIBUTE */
121 #  define FALL_THROUGH ((void)0)
122 # endif /* HAVE_FALLTHROUGH_ATTRIBUTE */
123 #endif /* FALL_THROUGH */
124
125 #ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE
126 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE __attribute__((no_sanitize_address))
127 #else
128 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
129 #endif
130
131 #ifdef HAVE_GCC_THREAD_LOCAL_STORAGE
132 # define SWRAP_THREAD __thread
133 #else
134 # define SWRAP_THREAD
135 #endif
136
137 #ifndef MIN
138 #define MIN(a,b) ((a)<(b)?(a):(b))
139 #endif
140
141 #ifndef ZERO_STRUCT
142 #define ZERO_STRUCT(x) memset((char *)&(x), 0, sizeof(x))
143 #endif
144
145 #ifndef ZERO_STRUCTP
146 #define ZERO_STRUCTP(x) do { \
147                 if ((x) != NULL) \
148                         memset((char *)(x), 0, sizeof(*(x))); \
149         } while(0)
150 #endif
151
152 #ifndef SAFE_FREE
153 #define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0)
154 #endif
155
156 #ifndef discard_const
157 #define discard_const(ptr) ((void *)((uintptr_t)(ptr)))
158 #endif
159
160 #ifndef discard_const_p
161 #define discard_const_p(type, ptr) ((type *)discard_const(ptr))
162 #endif
163
164 #define UNUSED(x) (void)(x)
165
166 #ifdef IPV6_PKTINFO
167 # ifndef IPV6_RECVPKTINFO
168 #  define IPV6_RECVPKTINFO IPV6_PKTINFO
169 # endif /* IPV6_RECVPKTINFO */
170 #endif /* IPV6_PKTINFO */
171
172 /*
173  * On BSD IP_PKTINFO has a different name because during
174  * the time when they implemented it, there was no RFC.
175  * The name for IPv6 is the same as on Linux.
176  */
177 #ifndef IP_PKTINFO
178 # ifdef IP_RECVDSTADDR
179 #  define IP_PKTINFO IP_RECVDSTADDR
180 # endif
181 #endif
182
183 #define socket_wrapper_init_mutex(m) \
184         _socket_wrapper_init_mutex(m, #m)
185
186 /* Add new global locks here please */
187 # define SWRAP_REINIT_ALL do { \
188         int ret; \
189         ret = socket_wrapper_init_mutex(&sockets_mutex); \
190         if (ret != 0) exit(-1); \
191         ret = socket_wrapper_init_mutex(&socket_reset_mutex); \
192         if (ret != 0) exit(-1); \
193         ret = socket_wrapper_init_mutex(&first_free_mutex); \
194         if (ret != 0) exit(-1); \
195         ret = socket_wrapper_init_mutex(&sockets_si_global); \
196         if (ret != 0) exit(-1); \
197         ret = socket_wrapper_init_mutex(&autobind_start_mutex); \
198         if (ret != 0) exit(-1); \
199         ret = socket_wrapper_init_mutex(&pcap_dump_mutex); \
200         if (ret != 0) exit(-1); \
201         ret = socket_wrapper_init_mutex(&mtu_update_mutex); \
202         if (ret != 0) exit(-1); \
203 } while(0)
204
205 # define SWRAP_LOCK_ALL do { \
206         swrap_mutex_lock(&sockets_mutex); \
207         swrap_mutex_lock(&socket_reset_mutex); \
208         swrap_mutex_lock(&first_free_mutex); \
209         swrap_mutex_lock(&sockets_si_global); \
210         swrap_mutex_lock(&autobind_start_mutex); \
211         swrap_mutex_lock(&pcap_dump_mutex); \
212         swrap_mutex_lock(&mtu_update_mutex); \
213 } while(0)
214
215 # define SWRAP_UNLOCK_ALL do { \
216         swrap_mutex_unlock(&mtu_update_mutex); \
217         swrap_mutex_unlock(&pcap_dump_mutex); \
218         swrap_mutex_unlock(&autobind_start_mutex); \
219         swrap_mutex_unlock(&sockets_si_global); \
220         swrap_mutex_unlock(&first_free_mutex); \
221         swrap_mutex_unlock(&socket_reset_mutex); \
222         swrap_mutex_unlock(&sockets_mutex); \
223 } while(0)
224
225 #define SOCKET_INFO_CONTAINER(si) \
226         (struct socket_info_container *)(si)
227
228 #define SWRAP_LOCK_SI(si) do { \
229         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
230         if (sic != NULL) { \
231                 swrap_mutex_lock(&sockets_si_global); \
232         } else { \
233                 abort(); \
234         } \
235 } while(0)
236
237 #define SWRAP_UNLOCK_SI(si) do { \
238         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
239         if (sic != NULL) { \
240                 swrap_mutex_unlock(&sockets_si_global); \
241         } else { \
242                 abort(); \
243         } \
244 } while(0)
245
246 #if defined(HAVE_GETTIMEOFDAY_TZ) || defined(HAVE_GETTIMEOFDAY_TZ_VOID)
247 #define swrapGetTimeOfDay(tval) gettimeofday(tval,NULL)
248 #else
249 #define swrapGetTimeOfDay(tval) gettimeofday(tval)
250 #endif
251
252 /* we need to use a very terse format here as IRIX 6.4 silently
253    truncates names to 16 chars, so if we use a longer name then we
254    can't tell which port a packet came from with recvfrom()
255
256    with this format we have 8 chars left for the directory name
257 */
258 #define SOCKET_FORMAT "%c%02X%04X"
259 #define SOCKET_TYPE_CHAR_TCP            'T'
260 #define SOCKET_TYPE_CHAR_UDP            'U'
261 #define SOCKET_TYPE_CHAR_TCP_V6         'X'
262 #define SOCKET_TYPE_CHAR_UDP_V6         'Y'
263
264 /*
265  * Set the packet MTU to 1500 bytes for stream sockets to make it it easier to
266  * format PCAP capture files (as the caller will simply continue from here).
267  */
268 #define SOCKET_WRAPPER_MTU_DEFAULT 1500
269 #define SOCKET_WRAPPER_MTU_MIN     512
270 #define SOCKET_WRAPPER_MTU_MAX     32768
271
272 #define SOCKET_MAX_SOCKETS 1024
273
274 /*
275  * Maximum number of socket_info structures that can
276  * be used. Can be overriden by the environment variable
277  * SOCKET_WRAPPER_MAX_SOCKETS.
278  */
279 #define SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT 65535
280
281 #define SOCKET_WRAPPER_MAX_SOCKETS_LIMIT 262140
282
283 /* This limit is to avoid broadcast sendto() needing to stat too many
284  * files.  It may be raised (with a performance cost) to up to 254
285  * without changing the format above */
286 #define MAX_WRAPPED_INTERFACES 64
287
288 struct swrap_address {
289         socklen_t sa_socklen;
290         union {
291                 struct sockaddr s;
292                 struct sockaddr_in in;
293 #ifdef HAVE_IPV6
294                 struct sockaddr_in6 in6;
295 #endif
296                 struct sockaddr_un un;
297                 struct sockaddr_storage ss;
298         } sa;
299 };
300
301 static int first_free;
302
303 struct socket_info
304 {
305         /*
306          * Remember to update swrap_unix_scm_right_magic
307          * on any change.
308          */
309
310         int family;
311         int type;
312         int protocol;
313         int bound;
314         int bcast;
315         int is_server;
316         int connected;
317         int defer_connect;
318         int pktinfo;
319         int tcp_nodelay;
320         int listening;
321         int fd_passed;
322
323         /* The unix path so we can unlink it on close() */
324         struct sockaddr_un un_addr;
325
326         struct swrap_address bindname;
327         struct swrap_address myname;
328         struct swrap_address peername;
329
330         struct {
331                 unsigned long pck_snd;
332                 unsigned long pck_rcv;
333         } io;
334 };
335
336 struct socket_info_meta
337 {
338         unsigned int refcount;
339         int next_free;
340         /*
341          * As long as we don't use shared memory
342          * for the sockets array, we use
343          * sockets_si_global as a single mutex.
344          *
345          * pthread_mutex_t mutex;
346          */
347 };
348
349 struct socket_info_container
350 {
351         struct socket_info info;
352         struct socket_info_meta meta;
353 };
354
355 static struct socket_info_container *sockets;
356
357 static size_t socket_info_max = 0;
358
359 /*
360  * Allocate the socket array always on the limit value. We want it to be
361  * at least bigger than the default so if we reach the limit we can
362  * still deal with duplicate fds pointing to the same socket_info.
363  */
364 static size_t socket_fds_max = SOCKET_WRAPPER_MAX_SOCKETS_LIMIT;
365
366 /* Hash table to map fds to corresponding socket_info index */
367 static int *socket_fds_idx;
368
369 /* Mutex for syncronizing port selection during swrap_auto_bind() */
370 static pthread_mutex_t autobind_start_mutex = PTHREAD_MUTEX_INITIALIZER;
371
372 /* Mutex to guard the initialization of array of socket_info structures */
373 static pthread_mutex_t sockets_mutex = PTHREAD_MUTEX_INITIALIZER;
374
375 /* Mutex to guard the socket reset in swrap_remove_wrapper() */
376 static pthread_mutex_t socket_reset_mutex = PTHREAD_MUTEX_INITIALIZER;
377
378 /* Mutex to synchronize access to first free index in socket_info array */
379 static pthread_mutex_t first_free_mutex = PTHREAD_MUTEX_INITIALIZER;
380
381 /*
382  * Mutex to synchronize access to to socket_info structures
383  * We use a single global mutex in order to avoid leaking
384  * ~ 38M copy on write memory per fork.
385  * max_sockets=65535 * sizeof(struct socket_info_container)=592 = 38796720
386  */
387 static pthread_mutex_t sockets_si_global = PTHREAD_MUTEX_INITIALIZER;
388
389 /* Mutex to synchronize access to packet capture dump file */
390 static pthread_mutex_t pcap_dump_mutex = PTHREAD_MUTEX_INITIALIZER;
391
392 /* Mutex for synchronizing mtu value fetch*/
393 static pthread_mutex_t mtu_update_mutex = PTHREAD_MUTEX_INITIALIZER;
394
395 /* Function prototypes */
396
397 #if ! defined(HAVE_CONSTRUCTOR_ATTRIBUTE) && defined(HAVE_PRAGMA_INIT)
398 /* xlC and other oldschool compilers support (only) this */
399 #pragma init (swrap_constructor)
400 #endif
401 void swrap_constructor(void) CONSTRUCTOR_ATTRIBUTE;
402 #if ! defined(HAVE_DESTRUCTOR_ATTRIBUTE) && defined(HAVE_PRAGMA_FINI)
403 #pragma fini (swrap_destructor)
404 #endif
405 void swrap_destructor(void) DESTRUCTOR_ATTRIBUTE;
406
407 #ifndef HAVE_GETPROGNAME
408 static const char *getprogname(void)
409 {
410 #if defined(HAVE_PROGRAM_INVOCATION_SHORT_NAME)
411         return program_invocation_short_name;
412 #elif defined(HAVE_GETEXECNAME)
413         return getexecname();
414 #else
415         return NULL;
416 #endif /* HAVE_PROGRAM_INVOCATION_SHORT_NAME */
417 }
418 #endif /* HAVE_GETPROGNAME */
419
420 static void swrap_log(enum swrap_dbglvl_e dbglvl, const char *func, const char *format, ...) PRINTF_ATTRIBUTE(3, 4);
421 # define SWRAP_LOG(dbglvl, ...) swrap_log((dbglvl), __func__, __VA_ARGS__)
422
423 static void swrap_log(enum swrap_dbglvl_e dbglvl,
424                       const char *func,
425                       const char *format, ...)
426 {
427         char buffer[1024];
428         va_list va;
429         const char *d;
430         unsigned int lvl = 0;
431         const char *prefix = "SWRAP";
432         const char *progname = getprogname();
433
434         d = getenv("SOCKET_WRAPPER_DEBUGLEVEL");
435         if (d != NULL) {
436                 lvl = atoi(d);
437         }
438
439         if (lvl < dbglvl) {
440                 return;
441         }
442
443         va_start(va, format);
444         vsnprintf(buffer, sizeof(buffer), format, va);
445         va_end(va);
446
447         switch (dbglvl) {
448                 case SWRAP_LOG_ERROR:
449                         prefix = "SWRAP_ERROR";
450                         break;
451                 case SWRAP_LOG_WARN:
452                         prefix = "SWRAP_WARN";
453                         break;
454                 case SWRAP_LOG_DEBUG:
455                         prefix = "SWRAP_DEBUG";
456                         break;
457                 case SWRAP_LOG_TRACE:
458                         prefix = "SWRAP_TRACE";
459                         break;
460         }
461
462         if (progname == NULL) {
463                 progname = "<unknown>";
464         }
465
466         fprintf(stderr,
467                 "%s[%s (%u)] - %s: %s\n",
468                 prefix,
469                 progname,
470                 (unsigned int)getpid(),
471                 func,
472                 buffer);
473 }
474
475 /*********************************************************
476  * SWRAP LOADING LIBC FUNCTIONS
477  *********************************************************/
478
479 #include <dlfcn.h>
480
481 #ifdef HAVE_ACCEPT4
482 typedef int (*__libc_accept4)(int sockfd,
483                               struct sockaddr *addr,
484                               socklen_t *addrlen,
485                               int flags);
486 #else
487 typedef int (*__libc_accept)(int sockfd,
488                              struct sockaddr *addr,
489                              socklen_t *addrlen);
490 #endif
491 typedef int (*__libc_bind)(int sockfd,
492                            const struct sockaddr *addr,
493                            socklen_t addrlen);
494 typedef int (*__libc_close)(int fd);
495 #ifdef HAVE___CLOSE_NOCANCEL
496 typedef int (*__libc___close_nocancel)(int fd);
497 #endif
498 typedef int (*__libc_connect)(int sockfd,
499                               const struct sockaddr *addr,
500                               socklen_t addrlen);
501 typedef int (*__libc_dup)(int fd);
502 typedef int (*__libc_dup2)(int oldfd, int newfd);
503 typedef int (*__libc_fcntl)(int fd, int cmd, ...);
504 typedef FILE *(*__libc_fopen)(const char *name, const char *mode);
505 #ifdef HAVE_FOPEN64
506 typedef FILE *(*__libc_fopen64)(const char *name, const char *mode);
507 #endif
508 #ifdef HAVE_EVENTFD
509 typedef int (*__libc_eventfd)(int count, int flags);
510 #endif
511 typedef int (*__libc_getpeername)(int sockfd,
512                                   struct sockaddr *addr,
513                                   socklen_t *addrlen);
514 typedef int (*__libc_getsockname)(int sockfd,
515                                   struct sockaddr *addr,
516                                   socklen_t *addrlen);
517 typedef int (*__libc_getsockopt)(int sockfd,
518                                int level,
519                                int optname,
520                                void *optval,
521                                socklen_t *optlen);
522 typedef int (*__libc_ioctl)(int d, unsigned long int request, ...);
523 typedef int (*__libc_listen)(int sockfd, int backlog);
524 typedef int (*__libc_open)(const char *pathname, int flags, ...);
525 #ifdef HAVE_OPEN64
526 typedef int (*__libc_open64)(const char *pathname, int flags, ...);
527 #endif /* HAVE_OPEN64 */
528 typedef int (*__libc_openat)(int dirfd, const char *path, int flags, ...);
529 typedef int (*__libc_pipe)(int pipefd[2]);
530 typedef int (*__libc_read)(int fd, void *buf, size_t count);
531 typedef ssize_t (*__libc_readv)(int fd, const struct iovec *iov, int iovcnt);
532 typedef int (*__libc_recv)(int sockfd, void *buf, size_t len, int flags);
533 typedef int (*__libc_recvfrom)(int sockfd,
534                              void *buf,
535                              size_t len,
536                              int flags,
537                              struct sockaddr *src_addr,
538                              socklen_t *addrlen);
539 typedef int (*__libc_recvmsg)(int sockfd, const struct msghdr *msg, int flags);
540 typedef int (*__libc_send)(int sockfd, const void *buf, size_t len, int flags);
541 typedef int (*__libc_sendmsg)(int sockfd, const struct msghdr *msg, int flags);
542 typedef int (*__libc_sendto)(int sockfd,
543                            const void *buf,
544                            size_t len,
545                            int flags,
546                            const  struct sockaddr *dst_addr,
547                            socklen_t addrlen);
548 typedef int (*__libc_setsockopt)(int sockfd,
549                                int level,
550                                int optname,
551                                const void *optval,
552                                socklen_t optlen);
553 #ifdef HAVE_SIGNALFD
554 typedef int (*__libc_signalfd)(int fd, const sigset_t *mask, int flags);
555 #endif
556 typedef int (*__libc_socket)(int domain, int type, int protocol);
557 typedef int (*__libc_socketpair)(int domain, int type, int protocol, int sv[2]);
558 #ifdef HAVE_TIMERFD_CREATE
559 typedef int (*__libc_timerfd_create)(int clockid, int flags);
560 #endif
561 typedef ssize_t (*__libc_write)(int fd, const void *buf, size_t count);
562 typedef ssize_t (*__libc_writev)(int fd, const struct iovec *iov, int iovcnt);
563
564 #define SWRAP_SYMBOL_ENTRY(i) \
565         union { \
566                 __libc_##i f; \
567                 void *obj; \
568         } _libc_##i
569
570 struct swrap_libc_symbols {
571 #ifdef HAVE_ACCEPT4
572         SWRAP_SYMBOL_ENTRY(accept4);
573 #else
574         SWRAP_SYMBOL_ENTRY(accept);
575 #endif
576         SWRAP_SYMBOL_ENTRY(bind);
577         SWRAP_SYMBOL_ENTRY(close);
578 #ifdef HAVE___CLOSE_NOCANCEL
579         SWRAP_SYMBOL_ENTRY(__close_nocancel);
580 #endif
581         SWRAP_SYMBOL_ENTRY(connect);
582         SWRAP_SYMBOL_ENTRY(dup);
583         SWRAP_SYMBOL_ENTRY(dup2);
584         SWRAP_SYMBOL_ENTRY(fcntl);
585         SWRAP_SYMBOL_ENTRY(fopen);
586 #ifdef HAVE_FOPEN64
587         SWRAP_SYMBOL_ENTRY(fopen64);
588 #endif
589 #ifdef HAVE_EVENTFD
590         SWRAP_SYMBOL_ENTRY(eventfd);
591 #endif
592         SWRAP_SYMBOL_ENTRY(getpeername);
593         SWRAP_SYMBOL_ENTRY(getsockname);
594         SWRAP_SYMBOL_ENTRY(getsockopt);
595         SWRAP_SYMBOL_ENTRY(ioctl);
596         SWRAP_SYMBOL_ENTRY(listen);
597         SWRAP_SYMBOL_ENTRY(open);
598 #ifdef HAVE_OPEN64
599         SWRAP_SYMBOL_ENTRY(open64);
600 #endif
601         SWRAP_SYMBOL_ENTRY(openat);
602         SWRAP_SYMBOL_ENTRY(pipe);
603         SWRAP_SYMBOL_ENTRY(read);
604         SWRAP_SYMBOL_ENTRY(readv);
605         SWRAP_SYMBOL_ENTRY(recv);
606         SWRAP_SYMBOL_ENTRY(recvfrom);
607         SWRAP_SYMBOL_ENTRY(recvmsg);
608         SWRAP_SYMBOL_ENTRY(send);
609         SWRAP_SYMBOL_ENTRY(sendmsg);
610         SWRAP_SYMBOL_ENTRY(sendto);
611         SWRAP_SYMBOL_ENTRY(setsockopt);
612 #ifdef HAVE_SIGNALFD
613         SWRAP_SYMBOL_ENTRY(signalfd);
614 #endif
615         SWRAP_SYMBOL_ENTRY(socket);
616         SWRAP_SYMBOL_ENTRY(socketpair);
617 #ifdef HAVE_TIMERFD_CREATE
618         SWRAP_SYMBOL_ENTRY(timerfd_create);
619 #endif
620         SWRAP_SYMBOL_ENTRY(write);
621         SWRAP_SYMBOL_ENTRY(writev);
622 };
623
624 struct swrap {
625         struct {
626                 void *handle;
627                 void *socket_handle;
628                 struct swrap_libc_symbols symbols;
629         } libc;
630 };
631
632 static struct swrap swrap;
633
634 /* prototypes */
635 static char *socket_wrapper_dir(void);
636
637 #define LIBC_NAME "libc.so"
638
639 enum swrap_lib {
640     SWRAP_LIBC,
641     SWRAP_LIBSOCKET,
642 };
643
644 static const char *swrap_str_lib(enum swrap_lib lib)
645 {
646         switch (lib) {
647         case SWRAP_LIBC:
648                 return "libc";
649         case SWRAP_LIBSOCKET:
650                 return "libsocket";
651         }
652
653         /* Compiler would warn us about unhandled enum value if we get here */
654         return "unknown";
655 }
656
657 static void *swrap_load_lib_handle(enum swrap_lib lib)
658 {
659         int flags = RTLD_LAZY;
660         void *handle = NULL;
661         int i;
662
663 #ifdef RTLD_DEEPBIND
664         const char *env_preload = getenv("LD_PRELOAD");
665         const char *env_deepbind = getenv("SOCKET_WRAPPER_DISABLE_DEEPBIND");
666         bool enable_deepbind = true;
667
668         /* Don't do a deepbind if we run with libasan */
669         if (env_preload != NULL && strlen(env_preload) < 1024) {
670                 const char *p = strstr(env_preload, "libasan.so");
671                 if (p != NULL) {
672                         enable_deepbind = false;
673                 }
674         }
675
676         if (env_deepbind != NULL && strlen(env_deepbind) >= 1) {
677                 enable_deepbind = false;
678         }
679
680         if (enable_deepbind) {
681                 flags |= RTLD_DEEPBIND;
682         }
683 #endif
684
685         switch (lib) {
686         case SWRAP_LIBSOCKET:
687 #ifdef HAVE_LIBSOCKET
688                 handle = swrap.libc.socket_handle;
689                 if (handle == NULL) {
690                         for (i = 10; i >= 0; i--) {
691                                 char soname[256] = {0};
692
693                                 snprintf(soname, sizeof(soname), "libsocket.so.%d", i);
694                                 handle = dlopen(soname, flags);
695                                 if (handle != NULL) {
696                                         break;
697                                 }
698                         }
699
700                         swrap.libc.socket_handle = handle;
701                 }
702                 break;
703 #endif
704         case SWRAP_LIBC:
705                 handle = swrap.libc.handle;
706 #ifdef LIBC_SO
707                 if (handle == NULL) {
708                         handle = dlopen(LIBC_SO, flags);
709
710                         swrap.libc.handle = handle;
711                 }
712 #endif
713                 if (handle == NULL) {
714                         for (i = 10; i >= 0; i--) {
715                                 char soname[256] = {0};
716
717                                 snprintf(soname, sizeof(soname), "libc.so.%d", i);
718                                 handle = dlopen(soname, flags);
719                                 if (handle != NULL) {
720                                         break;
721                                 }
722                         }
723
724                         swrap.libc.handle = handle;
725                 }
726                 break;
727         }
728
729         if (handle == NULL) {
730 #ifdef RTLD_NEXT
731                 handle = swrap.libc.handle = swrap.libc.socket_handle = RTLD_NEXT;
732 #else
733                 SWRAP_LOG(SWRAP_LOG_ERROR,
734                           "Failed to dlopen library: %s",
735                           dlerror());
736                 exit(-1);
737 #endif
738         }
739
740         return handle;
741 }
742
743 static void *_swrap_bind_symbol(enum swrap_lib lib, const char *fn_name)
744 {
745         void *handle;
746         void *func;
747
748         handle = swrap_load_lib_handle(lib);
749
750         func = dlsym(handle, fn_name);
751         if (func == NULL) {
752                 SWRAP_LOG(SWRAP_LOG_ERROR,
753                           "Failed to find %s: %s",
754                           fn_name,
755                           dlerror());
756                 exit(-1);
757         }
758
759         SWRAP_LOG(SWRAP_LOG_TRACE,
760                   "Loaded %s from %s",
761                   fn_name,
762                   swrap_str_lib(lib));
763
764         return func;
765 }
766
767 #define swrap_mutex_lock(m) _swrap_mutex_lock(m, #m, __func__, __LINE__)
768 static void _swrap_mutex_lock(pthread_mutex_t *mutex, const char *name, const char *caller, unsigned line)
769 {
770         int ret;
771
772         ret = pthread_mutex_lock(mutex);
773         if (ret != 0) {
774                 SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't lock pthread mutex(%s) - %s",
775                           getpid(), getppid(), caller, line, name, strerror(ret));
776                 abort();
777         }
778 }
779
780 #define swrap_mutex_unlock(m) _swrap_mutex_unlock(m, #m, __func__, __LINE__)
781 static void _swrap_mutex_unlock(pthread_mutex_t *mutex, const char *name, const char *caller, unsigned line)
782 {
783         int ret;
784
785         ret = pthread_mutex_unlock(mutex);
786         if (ret != 0) {
787                 SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't unlock pthread mutex(%s) - %s",
788                           getpid(), getppid(), caller, line, name, strerror(ret));
789                 abort();
790         }
791 }
792
793 /*
794  * These macros have a thread race condition on purpose!
795  *
796  * This is an optimization to avoid locking each time we check if the symbol is
797  * bound.
798  */
799 #define _swrap_bind_symbol_generic(lib, sym_name) do { \
800         swrap.libc.symbols._libc_##sym_name.obj = \
801                 _swrap_bind_symbol(lib, #sym_name); \
802 } while(0);
803
804 #define swrap_bind_symbol_libc(sym_name) \
805         _swrap_bind_symbol_generic(SWRAP_LIBC, sym_name)
806
807 #define swrap_bind_symbol_libsocket(sym_name) \
808         _swrap_bind_symbol_generic(SWRAP_LIBSOCKET, sym_name)
809
810 static void swrap_bind_symbol_all(void);
811
812 /****************************************************************************
813  *                               IMPORTANT
814  ****************************************************************************
815  *
816  * Functions especially from libc need to be loaded individually, you can't
817  * load all at once or gdb will segfault at startup. The same applies to
818  * valgrind and has probably something todo with with the linker.  So we need
819  * load each function at the point it is called the first time.
820  *
821  ****************************************************************************/
822
823 #ifdef HAVE_ACCEPT4
824 static int libc_accept4(int sockfd,
825                         struct sockaddr *addr,
826                         socklen_t *addrlen,
827                         int flags)
828 {
829         swrap_bind_symbol_all();
830
831         return swrap.libc.symbols._libc_accept4.f(sockfd, addr, addrlen, flags);
832 }
833
834 #else /* HAVE_ACCEPT4 */
835
836 static int libc_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
837 {
838         swrap_bind_symbol_all();
839
840         return swrap.libc.symbols._libc_accept.f(sockfd, addr, addrlen);
841 }
842 #endif /* HAVE_ACCEPT4 */
843
844 static int libc_bind(int sockfd,
845                      const struct sockaddr *addr,
846                      socklen_t addrlen)
847 {
848         swrap_bind_symbol_all();
849
850         return swrap.libc.symbols._libc_bind.f(sockfd, addr, addrlen);
851 }
852
853 static int libc_close(int fd)
854 {
855         swrap_bind_symbol_all();
856
857         return swrap.libc.symbols._libc_close.f(fd);
858 }
859
860 #ifdef HAVE___CLOSE_NOCANCEL
861 static int libc___close_nocancel(int fd)
862 {
863         swrap_bind_symbol_all();
864
865         return swrap.libc.symbols._libc___close_nocancel.f(fd);
866 }
867 #endif /* HAVE___CLOSE_NOCANCEL */
868
869 static int libc_connect(int sockfd,
870                         const struct sockaddr *addr,
871                         socklen_t addrlen)
872 {
873         swrap_bind_symbol_all();
874
875         return swrap.libc.symbols._libc_connect.f(sockfd, addr, addrlen);
876 }
877
878 static int libc_dup(int fd)
879 {
880         swrap_bind_symbol_all();
881
882         return swrap.libc.symbols._libc_dup.f(fd);
883 }
884
885 static int libc_dup2(int oldfd, int newfd)
886 {
887         swrap_bind_symbol_all();
888
889         return swrap.libc.symbols._libc_dup2.f(oldfd, newfd);
890 }
891
892 #ifdef HAVE_EVENTFD
893 static int libc_eventfd(int count, int flags)
894 {
895         swrap_bind_symbol_all();
896
897         return swrap.libc.symbols._libc_eventfd.f(count, flags);
898 }
899 #endif
900
901 DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
902 static int libc_vfcntl(int fd, int cmd, va_list ap)
903 {
904         void *arg;
905         int rc;
906
907         swrap_bind_symbol_all();
908
909         arg = va_arg(ap, void *);
910
911         rc = swrap.libc.symbols._libc_fcntl.f(fd, cmd, arg);
912
913         return rc;
914 }
915
916 static int libc_getpeername(int sockfd,
917                             struct sockaddr *addr,
918                             socklen_t *addrlen)
919 {
920         swrap_bind_symbol_all();
921
922         return swrap.libc.symbols._libc_getpeername.f(sockfd, addr, addrlen);
923 }
924
925 static int libc_getsockname(int sockfd,
926                             struct sockaddr *addr,
927                             socklen_t *addrlen)
928 {
929         swrap_bind_symbol_all();
930
931         return swrap.libc.symbols._libc_getsockname.f(sockfd, addr, addrlen);
932 }
933
934 static int libc_getsockopt(int sockfd,
935                            int level,
936                            int optname,
937                            void *optval,
938                            socklen_t *optlen)
939 {
940         swrap_bind_symbol_all();
941
942         return swrap.libc.symbols._libc_getsockopt.f(sockfd,
943                                                      level,
944                                                      optname,
945                                                      optval,
946                                                      optlen);
947 }
948
949 DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
950 static int libc_vioctl(int d, unsigned long int request, va_list ap)
951 {
952         void *arg;
953         int rc;
954
955         swrap_bind_symbol_all();
956
957         arg = va_arg(ap, void *);
958
959         rc = swrap.libc.symbols._libc_ioctl.f(d, request, arg);
960
961         return rc;
962 }
963
964 static int libc_listen(int sockfd, int backlog)
965 {
966         swrap_bind_symbol_all();
967
968         return swrap.libc.symbols._libc_listen.f(sockfd, backlog);
969 }
970
971 static FILE *libc_fopen(const char *name, const char *mode)
972 {
973         swrap_bind_symbol_all();
974
975         return swrap.libc.symbols._libc_fopen.f(name, mode);
976 }
977
978 #ifdef HAVE_FOPEN64
979 static FILE *libc_fopen64(const char *name, const char *mode)
980 {
981         swrap_bind_symbol_all();
982
983         return swrap.libc.symbols._libc_fopen64.f(name, mode);
984 }
985 #endif /* HAVE_FOPEN64 */
986
987 static int libc_vopen(const char *pathname, int flags, va_list ap)
988 {
989         int mode = 0;
990         int fd;
991
992         swrap_bind_symbol_all();
993
994         if (flags & O_CREAT) {
995                 mode = va_arg(ap, int);
996         }
997         fd = swrap.libc.symbols._libc_open.f(pathname, flags, (mode_t)mode);
998
999         return fd;
1000 }
1001
1002 static int libc_open(const char *pathname, int flags, ...)
1003 {
1004         va_list ap;
1005         int fd;
1006
1007         va_start(ap, flags);
1008         fd = libc_vopen(pathname, flags, ap);
1009         va_end(ap);
1010
1011         return fd;
1012 }
1013
1014 #ifdef HAVE_OPEN64
1015 static int libc_vopen64(const char *pathname, int flags, va_list ap)
1016 {
1017         int mode = 0;
1018         int fd;
1019
1020         swrap_bind_symbol_all();
1021
1022         if (flags & O_CREAT) {
1023                 mode = va_arg(ap, int);
1024         }
1025         fd = swrap.libc.symbols._libc_open64.f(pathname, flags, (mode_t)mode);
1026
1027         return fd;
1028 }
1029 #endif /* HAVE_OPEN64 */
1030
1031 static int libc_vopenat(int dirfd, const char *path, int flags, va_list ap)
1032 {
1033         int mode = 0;
1034         int fd;
1035
1036         swrap_bind_symbol_all();
1037
1038         if (flags & O_CREAT) {
1039                 mode = va_arg(ap, int);
1040         }
1041         fd = swrap.libc.symbols._libc_openat.f(dirfd,
1042                                                path,
1043                                                flags,
1044                                                (mode_t)mode);
1045
1046         return fd;
1047 }
1048
1049 #if 0
1050 static int libc_openat(int dirfd, const char *path, int flags, ...)
1051 {
1052         va_list ap;
1053         int fd;
1054
1055         va_start(ap, flags);
1056         fd = libc_vopenat(dirfd, path, flags, ap);
1057         va_end(ap);
1058
1059         return fd;
1060 }
1061 #endif
1062
1063 static int libc_pipe(int pipefd[2])
1064 {
1065         swrap_bind_symbol_all();
1066
1067         return swrap.libc.symbols._libc_pipe.f(pipefd);
1068 }
1069
1070 static int libc_read(int fd, void *buf, size_t count)
1071 {
1072         swrap_bind_symbol_all();
1073
1074         return swrap.libc.symbols._libc_read.f(fd, buf, count);
1075 }
1076
1077 static ssize_t libc_readv(int fd, const struct iovec *iov, int iovcnt)
1078 {
1079         swrap_bind_symbol_all();
1080
1081         return swrap.libc.symbols._libc_readv.f(fd, iov, iovcnt);
1082 }
1083
1084 static int libc_recv(int sockfd, void *buf, size_t len, int flags)
1085 {
1086         swrap_bind_symbol_all();
1087
1088         return swrap.libc.symbols._libc_recv.f(sockfd, buf, len, flags);
1089 }
1090
1091 static int libc_recvfrom(int sockfd,
1092                          void *buf,
1093                          size_t len,
1094                          int flags,
1095                          struct sockaddr *src_addr,
1096                          socklen_t *addrlen)
1097 {
1098         swrap_bind_symbol_all();
1099
1100         return swrap.libc.symbols._libc_recvfrom.f(sockfd,
1101                                                    buf,
1102                                                    len,
1103                                                    flags,
1104                                                    src_addr,
1105                                                    addrlen);
1106 }
1107
1108 static int libc_recvmsg(int sockfd, struct msghdr *msg, int flags)
1109 {
1110         swrap_bind_symbol_all();
1111
1112         return swrap.libc.symbols._libc_recvmsg.f(sockfd, msg, flags);
1113 }
1114
1115 static int libc_send(int sockfd, const void *buf, size_t len, int flags)
1116 {
1117         swrap_bind_symbol_all();
1118
1119         return swrap.libc.symbols._libc_send.f(sockfd, buf, len, flags);
1120 }
1121
1122 static int libc_sendmsg(int sockfd, const struct msghdr *msg, int flags)
1123 {
1124         swrap_bind_symbol_all();
1125
1126         return swrap.libc.symbols._libc_sendmsg.f(sockfd, msg, flags);
1127 }
1128
1129 static int libc_sendto(int sockfd,
1130                        const void *buf,
1131                        size_t len,
1132                        int flags,
1133                        const  struct sockaddr *dst_addr,
1134                        socklen_t addrlen)
1135 {
1136         swrap_bind_symbol_all();
1137
1138         return swrap.libc.symbols._libc_sendto.f(sockfd,
1139                                                  buf,
1140                                                  len,
1141                                                  flags,
1142                                                  dst_addr,
1143                                                  addrlen);
1144 }
1145
1146 static int libc_setsockopt(int sockfd,
1147                            int level,
1148                            int optname,
1149                            const void *optval,
1150                            socklen_t optlen)
1151 {
1152         swrap_bind_symbol_all();
1153
1154         return swrap.libc.symbols._libc_setsockopt.f(sockfd,
1155                                                      level,
1156                                                      optname,
1157                                                      optval,
1158                                                      optlen);
1159 }
1160
1161 #ifdef HAVE_SIGNALFD
1162 static int libc_signalfd(int fd, const sigset_t *mask, int flags)
1163 {
1164         swrap_bind_symbol_all();
1165
1166         return swrap.libc.symbols._libc_signalfd.f(fd, mask, flags);
1167 }
1168 #endif
1169
1170 static int libc_socket(int domain, int type, int protocol)
1171 {
1172         swrap_bind_symbol_all();
1173
1174         return swrap.libc.symbols._libc_socket.f(domain, type, protocol);
1175 }
1176
1177 static int libc_socketpair(int domain, int type, int protocol, int sv[2])
1178 {
1179         swrap_bind_symbol_all();
1180
1181         return swrap.libc.symbols._libc_socketpair.f(domain, type, protocol, sv);
1182 }
1183
1184 #ifdef HAVE_TIMERFD_CREATE
1185 static int libc_timerfd_create(int clockid, int flags)
1186 {
1187         swrap_bind_symbol_all();
1188
1189         return swrap.libc.symbols._libc_timerfd_create.f(clockid, flags);
1190 }
1191 #endif
1192
1193 static ssize_t libc_write(int fd, const void *buf, size_t count)
1194 {
1195         swrap_bind_symbol_all();
1196
1197         return swrap.libc.symbols._libc_write.f(fd, buf, count);
1198 }
1199
1200 static ssize_t libc_writev(int fd, const struct iovec *iov, int iovcnt)
1201 {
1202         swrap_bind_symbol_all();
1203
1204         return swrap.libc.symbols._libc_writev.f(fd, iov, iovcnt);
1205 }
1206
1207 /* DO NOT call this function during library initialization! */
1208 static void __swrap_bind_symbol_all_once(void)
1209 {
1210 #ifdef HAVE_ACCEPT4
1211         swrap_bind_symbol_libsocket(accept4);
1212 #else
1213         swrap_bind_symbol_libsocket(accept);
1214 #endif
1215         swrap_bind_symbol_libsocket(bind);
1216         swrap_bind_symbol_libc(close);
1217 #ifdef HAVE___CLOSE_NOCANCEL
1218         swrap_bind_symbol_libc(__close_nocancel);
1219 #endif
1220         swrap_bind_symbol_libsocket(connect);
1221         swrap_bind_symbol_libc(dup);
1222         swrap_bind_symbol_libc(dup2);
1223         swrap_bind_symbol_libc(fcntl);
1224         swrap_bind_symbol_libc(fopen);
1225 #ifdef HAVE_FOPEN64
1226         swrap_bind_symbol_libc(fopen64);
1227 #endif
1228 #ifdef HAVE_EVENTFD
1229         swrap_bind_symbol_libc(eventfd);
1230 #endif
1231         swrap_bind_symbol_libsocket(getpeername);
1232         swrap_bind_symbol_libsocket(getsockname);
1233         swrap_bind_symbol_libsocket(getsockopt);
1234         swrap_bind_symbol_libc(ioctl);
1235         swrap_bind_symbol_libsocket(listen);
1236         swrap_bind_symbol_libc(open);
1237 #ifdef HAVE_OPEN64
1238         swrap_bind_symbol_libc(open64);
1239 #endif
1240         swrap_bind_symbol_libc(openat);
1241         swrap_bind_symbol_libsocket(pipe);
1242         swrap_bind_symbol_libc(read);
1243         swrap_bind_symbol_libsocket(readv);
1244         swrap_bind_symbol_libsocket(recv);
1245         swrap_bind_symbol_libsocket(recvfrom);
1246         swrap_bind_symbol_libsocket(recvmsg);
1247         swrap_bind_symbol_libsocket(send);
1248         swrap_bind_symbol_libsocket(sendmsg);
1249         swrap_bind_symbol_libsocket(sendto);
1250         swrap_bind_symbol_libsocket(setsockopt);
1251 #ifdef HAVE_SIGNALFD
1252         swrap_bind_symbol_libsocket(signalfd);
1253 #endif
1254         swrap_bind_symbol_libsocket(socket);
1255         swrap_bind_symbol_libsocket(socketpair);
1256 #ifdef HAVE_TIMERFD_CREATE
1257         swrap_bind_symbol_libc(timerfd_create);
1258 #endif
1259         swrap_bind_symbol_libc(write);
1260         swrap_bind_symbol_libsocket(writev);
1261 }
1262
1263 static void swrap_bind_symbol_all(void)
1264 {
1265         static pthread_once_t all_symbol_binding_once = PTHREAD_ONCE_INIT;
1266
1267         pthread_once(&all_symbol_binding_once, __swrap_bind_symbol_all_once);
1268 }
1269
1270 /*********************************************************
1271  * SWRAP HELPER FUNCTIONS
1272  *********************************************************/
1273
1274 /*
1275  * We return 127.0.0.0 (default) or 10.53.57.0.
1276  *
1277  * This can be controlled by:
1278  * SOCKET_WRAPPER_IPV4_NETWORK=127.0.0.0 (default)
1279  * or
1280  * SOCKET_WRAPPER_IPV4_NETWORK=10.53.57.0
1281  */
1282 static in_addr_t swrap_ipv4_net(void)
1283 {
1284         static int initialized;
1285         static in_addr_t hv;
1286         const char *net_str = NULL;
1287         struct in_addr nv;
1288         int ret;
1289
1290         if (initialized) {
1291                 return hv;
1292         }
1293         initialized = 1;
1294
1295         net_str = getenv("SOCKET_WRAPPER_IPV4_NETWORK");
1296         if (net_str == NULL) {
1297                 net_str = "127.0.0.0";
1298         }
1299
1300         ret = inet_pton(AF_INET, net_str, &nv);
1301         if (ret <= 0) {
1302                 SWRAP_LOG(SWRAP_LOG_ERROR,
1303                           "INVALID IPv4 Network [%s]",
1304                           net_str);
1305                 abort();
1306         }
1307
1308         hv = ntohl(nv.s_addr);
1309
1310         switch (hv) {
1311         case 0x7f000000:
1312                 /* 127.0.0.0 */
1313                 break;
1314         case 0x0a353900:
1315                 /* 10.53.57.0 */
1316                 break;
1317         default:
1318                 SWRAP_LOG(SWRAP_LOG_ERROR,
1319                           "INVALID IPv4 Network [%s][0x%x] should be "
1320                           "127.0.0.0 or 10.53.57.0",
1321                           net_str, (unsigned)hv);
1322                 abort();
1323         }
1324
1325         return hv;
1326 }
1327
1328 /*
1329  * This returns 127.255.255.255 or 10.255.255.255
1330  */
1331 static in_addr_t swrap_ipv4_bcast(void)
1332 {
1333         in_addr_t hv;
1334
1335         hv = swrap_ipv4_net();
1336         hv |= IN_CLASSA_HOST;
1337
1338         return hv;
1339 }
1340
1341 /*
1342  * This returns 127.0.0.${iface} or 10.53.57.${iface}
1343  */
1344 static in_addr_t swrap_ipv4_iface(unsigned int iface)
1345 {
1346         in_addr_t hv;
1347
1348         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
1349                 SWRAP_LOG(SWRAP_LOG_ERROR,
1350                           "swrap_ipv4_iface(%u) invalid!",
1351                           iface);
1352                 abort();
1353                 return -1;
1354         }
1355
1356         hv = swrap_ipv4_net();
1357         hv |= iface;
1358
1359         return hv;
1360 }
1361
1362 #ifdef HAVE_IPV6
1363 /*
1364  * FD00::5357:5FXX
1365  */
1366 static const struct in6_addr *swrap_ipv6(void)
1367 {
1368         static struct in6_addr v;
1369         static int initialized;
1370         int ret;
1371
1372         if (initialized) {
1373                 return &v;
1374         }
1375         initialized = 1;
1376
1377         ret = inet_pton(AF_INET6, "FD00::5357:5F00", &v);
1378         if (ret <= 0) {
1379                 abort();
1380         }
1381
1382         return &v;
1383 }
1384 #endif
1385
1386 static void set_port(int family, int prt, struct swrap_address *addr)
1387 {
1388         switch (family) {
1389         case AF_INET:
1390                 addr->sa.in.sin_port = htons(prt);
1391                 break;
1392 #ifdef HAVE_IPV6
1393         case AF_INET6:
1394                 addr->sa.in6.sin6_port = htons(prt);
1395                 break;
1396 #endif
1397         }
1398 }
1399
1400 static size_t socket_length(int family)
1401 {
1402         switch (family) {
1403         case AF_INET:
1404                 return sizeof(struct sockaddr_in);
1405 #ifdef HAVE_IPV6
1406         case AF_INET6:
1407                 return sizeof(struct sockaddr_in6);
1408 #endif
1409         }
1410         return 0;
1411 }
1412
1413 static struct socket_info *swrap_get_socket_info(int si_index)
1414 {
1415         return (struct socket_info *)(&(sockets[si_index].info));
1416 }
1417
1418 static int swrap_get_refcount(struct socket_info *si)
1419 {
1420         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1421         return sic->meta.refcount;
1422 }
1423
1424 static void swrap_inc_refcount(struct socket_info *si)
1425 {
1426         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1427
1428         sic->meta.refcount += 1;
1429 }
1430
1431 static void swrap_dec_refcount(struct socket_info *si)
1432 {
1433         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1434
1435         sic->meta.refcount -= 1;
1436 }
1437
1438 static int swrap_get_next_free(struct socket_info *si)
1439 {
1440         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1441
1442         return sic->meta.next_free;
1443 }
1444
1445 static void swrap_set_next_free(struct socket_info *si, int next_free)
1446 {
1447         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1448
1449         sic->meta.next_free = next_free;
1450 }
1451
1452 static int swrap_un_path(struct sockaddr_un *un,
1453                          const char *swrap_dir,
1454                          char type,
1455                          unsigned int iface,
1456                          unsigned int prt)
1457 {
1458         int ret;
1459
1460         ret = snprintf(un->sun_path,
1461                        sizeof(un->sun_path),
1462                        "%s/"SOCKET_FORMAT,
1463                        swrap_dir,
1464                        type,
1465                        iface,
1466                        prt);
1467         if ((size_t)ret >= sizeof(un->sun_path)) {
1468                 return ENAMETOOLONG;
1469         }
1470
1471         return 0;
1472 }
1473
1474 static int swrap_un_path_EINVAL(struct sockaddr_un *un,
1475                                 const char *swrap_dir)
1476 {
1477         int ret;
1478
1479         ret = snprintf(un->sun_path,
1480                        sizeof(un->sun_path),
1481                        "%s/EINVAL",
1482                        swrap_dir);
1483
1484         if ((size_t)ret >= sizeof(un->sun_path)) {
1485                 return ENAMETOOLONG;
1486         }
1487
1488         return 0;
1489 }
1490
1491 static bool swrap_dir_usable(const char *swrap_dir)
1492 {
1493         struct sockaddr_un un;
1494         int ret;
1495
1496         ret = swrap_un_path(&un, swrap_dir, SOCKET_TYPE_CHAR_TCP, 0, 0);
1497         if (ret == 0) {
1498                 return true;
1499         }
1500
1501         ret = swrap_un_path_EINVAL(&un, swrap_dir);
1502         if (ret == 0) {
1503                 return true;
1504         }
1505
1506         return false;
1507 }
1508
1509 static char *socket_wrapper_dir(void)
1510 {
1511         char *swrap_dir = NULL;
1512         char *s = getenv("SOCKET_WRAPPER_DIR");
1513         char *t;
1514         bool ok;
1515
1516         if (s == NULL || s[0] == '\0') {
1517                 SWRAP_LOG(SWRAP_LOG_WARN, "SOCKET_WRAPPER_DIR not set");
1518                 return NULL;
1519         }
1520
1521         swrap_dir = realpath(s, NULL);
1522         if (swrap_dir == NULL) {
1523                 SWRAP_LOG(SWRAP_LOG_ERROR,
1524                           "Unable to resolve socket_wrapper dir path: %s - %s",
1525                           s,
1526                           strerror(errno));
1527                 abort();
1528         }
1529
1530         ok = swrap_dir_usable(swrap_dir);
1531         if (ok) {
1532                 goto done;
1533         }
1534
1535         free(swrap_dir);
1536
1537         ok = swrap_dir_usable(s);
1538         if (!ok) {
1539                 SWRAP_LOG(SWRAP_LOG_ERROR, "SOCKET_WRAPPER_DIR is too long");
1540                 abort();
1541         }
1542
1543         t = getenv("SOCKET_WRAPPER_DIR_ALLOW_ORIG");
1544         if (t == NULL) {
1545                 SWRAP_LOG(SWRAP_LOG_ERROR,
1546                           "realpath(SOCKET_WRAPPER_DIR) too long and "
1547                           "SOCKET_WRAPPER_DIR_ALLOW_ORIG not set");
1548                 abort();
1549
1550         }
1551
1552         swrap_dir = strdup(s);
1553         if (swrap_dir == NULL) {
1554                 SWRAP_LOG(SWRAP_LOG_ERROR,
1555                           "Unable to duplicate socket_wrapper dir path");
1556                 abort();
1557         }
1558
1559         SWRAP_LOG(SWRAP_LOG_WARN,
1560                   "realpath(SOCKET_WRAPPER_DIR) too long, "
1561                   "using original SOCKET_WRAPPER_DIR\n");
1562
1563 done:
1564         SWRAP_LOG(SWRAP_LOG_TRACE, "socket_wrapper_dir: %s", swrap_dir);
1565         return swrap_dir;
1566 }
1567
1568 static unsigned int socket_wrapper_mtu(void)
1569 {
1570         static unsigned int max_mtu = 0;
1571         unsigned int tmp;
1572         const char *s;
1573         char *endp;
1574
1575         swrap_mutex_lock(&mtu_update_mutex);
1576
1577         if (max_mtu != 0) {
1578                 goto done;
1579         }
1580
1581         max_mtu = SOCKET_WRAPPER_MTU_DEFAULT;
1582
1583         s = getenv("SOCKET_WRAPPER_MTU");
1584         if (s == NULL) {
1585                 goto done;
1586         }
1587
1588         tmp = strtol(s, &endp, 10);
1589         if (s == endp) {
1590                 goto done;
1591         }
1592
1593         if (tmp < SOCKET_WRAPPER_MTU_MIN || tmp > SOCKET_WRAPPER_MTU_MAX) {
1594                 goto done;
1595         }
1596         max_mtu = tmp;
1597
1598 done:
1599         swrap_mutex_unlock(&mtu_update_mutex);
1600         return max_mtu;
1601 }
1602
1603 static int _socket_wrapper_init_mutex(pthread_mutex_t *m, const char *name)
1604 {
1605         pthread_mutexattr_t ma;
1606         bool need_destroy = false;
1607         int ret = 0;
1608
1609 #define __CHECK(cmd) do { \
1610         ret = cmd; \
1611         if (ret != 0) { \
1612                 SWRAP_LOG(SWRAP_LOG_ERROR, \
1613                           "%s: %s - failed %d", \
1614                           name, #cmd, ret); \
1615                 goto done; \
1616         } \
1617 } while(0)
1618
1619         *m = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
1620         __CHECK(pthread_mutexattr_init(&ma));
1621         need_destroy = true;
1622         __CHECK(pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK));
1623         __CHECK(pthread_mutex_init(m, &ma));
1624 done:
1625         if (need_destroy) {
1626                 pthread_mutexattr_destroy(&ma);
1627         }
1628         return ret;
1629 }
1630
1631 static size_t socket_wrapper_max_sockets(void)
1632 {
1633         const char *s;
1634         size_t tmp;
1635         char *endp;
1636
1637         if (socket_info_max != 0) {
1638                 return socket_info_max;
1639         }
1640
1641         socket_info_max = SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT;
1642
1643         s = getenv("SOCKET_WRAPPER_MAX_SOCKETS");
1644         if (s == NULL || s[0] == '\0') {
1645                 goto done;
1646         }
1647
1648         tmp = strtoul(s, &endp, 10);
1649         if (s == endp) {
1650                 goto done;
1651         }
1652         if (tmp == 0) {
1653                 tmp = SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT;
1654                 SWRAP_LOG(SWRAP_LOG_ERROR,
1655                           "Invalid number of sockets specified, "
1656                           "using default (%zu)",
1657                           tmp);
1658         }
1659
1660         if (tmp > SOCKET_WRAPPER_MAX_SOCKETS_LIMIT) {
1661                 tmp = SOCKET_WRAPPER_MAX_SOCKETS_LIMIT;
1662                 SWRAP_LOG(SWRAP_LOG_ERROR,
1663                           "Invalid number of sockets specified, "
1664                           "using maximum (%zu).",
1665                           tmp);
1666         }
1667
1668         socket_info_max = tmp;
1669
1670 done:
1671         return socket_info_max;
1672 }
1673
1674 static void socket_wrapper_init_fds_idx(void)
1675 {
1676         int *tmp = NULL;
1677         size_t i;
1678
1679         if (socket_fds_idx != NULL) {
1680                 return;
1681         }
1682
1683         tmp = (int *)calloc(socket_fds_max, sizeof(int));
1684         if (tmp == NULL) {
1685                 SWRAP_LOG(SWRAP_LOG_ERROR,
1686                           "Failed to allocate socket fds index array: %s",
1687                           strerror(errno));
1688                 exit(-1);
1689         }
1690
1691         for (i = 0; i < socket_fds_max; i++) {
1692                 tmp[i] = -1;
1693         }
1694
1695         socket_fds_idx = tmp;
1696 }
1697
1698 static void socket_wrapper_init_sockets(void)
1699 {
1700         size_t max_sockets;
1701         size_t i;
1702         int ret = 0;
1703
1704         swrap_bind_symbol_all();
1705
1706         swrap_mutex_lock(&sockets_mutex);
1707
1708         if (sockets != NULL) {
1709                 swrap_mutex_unlock(&sockets_mutex);
1710                 return;
1711         }
1712
1713         SWRAP_LOG(SWRAP_LOG_DEBUG,
1714                   "SOCKET_WRAPPER_PACKAGE[%s] SOCKET_WRAPPER_VERSION[%s]",
1715                   SOCKET_WRAPPER_PACKAGE, SOCKET_WRAPPER_VERSION);
1716
1717         /*
1718          * Intialize the static cache early before
1719          * any thread is able to start.
1720          */
1721         (void)swrap_ipv4_net();
1722
1723         socket_wrapper_init_fds_idx();
1724
1725         /* Needs to be called inside the sockets_mutex lock here. */
1726         max_sockets = socket_wrapper_max_sockets();
1727
1728         sockets = (struct socket_info_container *)calloc(max_sockets,
1729                                         sizeof(struct socket_info_container));
1730
1731         if (sockets == NULL) {
1732                 SWRAP_LOG(SWRAP_LOG_ERROR,
1733                           "Failed to allocate sockets array: %s",
1734                           strerror(errno));
1735                 swrap_mutex_unlock(&sockets_mutex);
1736                 exit(-1);
1737         }
1738
1739         swrap_mutex_lock(&first_free_mutex);
1740         swrap_mutex_lock(&sockets_si_global);
1741
1742         first_free = 0;
1743
1744         for (i = 0; i < max_sockets; i++) {
1745                 swrap_set_next_free(&sockets[i].info, i+1);
1746         }
1747
1748         /* mark the end of the free list */
1749         swrap_set_next_free(&sockets[max_sockets-1].info, -1);
1750
1751         swrap_mutex_unlock(&sockets_si_global);
1752         swrap_mutex_unlock(&first_free_mutex);
1753         swrap_mutex_unlock(&sockets_mutex);
1754         if (ret != 0) {
1755                 exit(-1);
1756         }
1757 }
1758
1759 bool socket_wrapper_enabled(void)
1760 {
1761         char *s = socket_wrapper_dir();
1762
1763         if (s == NULL) {
1764                 return false;
1765         }
1766
1767         SAFE_FREE(s);
1768
1769         socket_wrapper_init_sockets();
1770
1771         return true;
1772 }
1773
1774 static unsigned int socket_wrapper_default_iface(void)
1775 {
1776         const char *s = getenv("SOCKET_WRAPPER_DEFAULT_IFACE");
1777         if (s) {
1778                 unsigned int iface;
1779                 if (sscanf(s, "%u", &iface) == 1) {
1780                         if (iface >= 1 && iface <= MAX_WRAPPED_INTERFACES) {
1781                                 return iface;
1782                         }
1783                 }
1784         }
1785
1786         return 1;/* 127.0.0.1 */
1787 }
1788
1789 static void set_socket_info_index(int fd, int idx)
1790 {
1791         SWRAP_LOG(SWRAP_LOG_TRACE,
1792                   "fd=%d idx=%d",
1793                   fd, idx);
1794         socket_fds_idx[fd] = idx;
1795         /* This builtin issues a full memory barrier. */
1796         __sync_synchronize();
1797 }
1798
1799 static void reset_socket_info_index(int fd)
1800 {
1801         SWRAP_LOG(SWRAP_LOG_TRACE,
1802                   "fd=%d idx=%d",
1803                   fd, -1);
1804         set_socket_info_index(fd, -1);
1805 }
1806
1807 static int find_socket_info_index(int fd)
1808 {
1809         if (fd < 0) {
1810                 return -1;
1811         }
1812
1813         if (socket_fds_idx == NULL) {
1814                 return -1;
1815         }
1816
1817         if ((size_t)fd >= socket_fds_max) {
1818                 /*
1819                  * Do not add a log here as some applications do stupid things
1820                  * like:
1821                  *
1822                  *     for (fd = 0; fd <= getdtablesize(); fd++) {
1823                  *         close(fd)
1824                  *     };
1825                  *
1826                  * This would produce millions of lines of debug messages.
1827                  */
1828 #if 0
1829                 SWRAP_LOG(SWRAP_LOG_ERROR,
1830                           "Looking for a socket info for the fd %d is over the "
1831                           "max socket index limit of %zu.",
1832                           fd,
1833                           socket_fds_max);
1834 #endif
1835                 return -1;
1836         }
1837
1838         /* This builtin issues a full memory barrier. */
1839         __sync_synchronize();
1840         return socket_fds_idx[fd];
1841 }
1842
1843 static int swrap_add_socket_info(const struct socket_info *si_input)
1844 {
1845         struct socket_info *si = NULL;
1846         int si_index = -1;
1847
1848         if (si_input == NULL) {
1849                 errno = EINVAL;
1850                 return -1;
1851         }
1852
1853         swrap_mutex_lock(&first_free_mutex);
1854         if (first_free == -1) {
1855                 errno = ENFILE;
1856                 goto out;
1857         }
1858
1859         si_index = first_free;
1860         si = swrap_get_socket_info(si_index);
1861
1862         SWRAP_LOCK_SI(si);
1863
1864         first_free = swrap_get_next_free(si);
1865         *si = *si_input;
1866         swrap_inc_refcount(si);
1867
1868         SWRAP_UNLOCK_SI(si);
1869
1870 out:
1871         swrap_mutex_unlock(&first_free_mutex);
1872
1873         return si_index;
1874 }
1875
1876 static int swrap_create_socket(struct socket_info *si, int fd)
1877 {
1878         int idx;
1879
1880         if ((size_t)fd >= socket_fds_max) {
1881                 SWRAP_LOG(SWRAP_LOG_ERROR,
1882                           "The max socket index limit of %zu has been reached, "
1883                           "trying to add %d",
1884                           socket_fds_max,
1885                           fd);
1886                 errno = EMFILE;
1887                 return -1;
1888         }
1889
1890         idx = swrap_add_socket_info(si);
1891         if (idx == -1) {
1892                 return -1;
1893         }
1894
1895         set_socket_info_index(fd, idx);
1896
1897         return idx;
1898 }
1899
1900 static int convert_un_in(const struct sockaddr_un *un, struct sockaddr *in, socklen_t *len)
1901 {
1902         unsigned int iface;
1903         unsigned int prt;
1904         const char *p;
1905         char type;
1906
1907         p = strrchr(un->sun_path, '/');
1908         if (p) p++; else p = un->sun_path;
1909
1910         if (sscanf(p, SOCKET_FORMAT, &type, &iface, &prt) != 3) {
1911                 SWRAP_LOG(SWRAP_LOG_ERROR, "sun_path[%s] p[%s]",
1912                           un->sun_path, p);
1913                 errno = EINVAL;
1914                 return -1;
1915         }
1916
1917         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
1918                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
1919                           type, iface, prt);
1920                 errno = EINVAL;
1921                 return -1;
1922         }
1923
1924         if (prt > 0xFFFF) {
1925                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
1926                           type, iface, prt);
1927                 errno = EINVAL;
1928                 return -1;
1929         }
1930
1931         SWRAP_LOG(SWRAP_LOG_TRACE, "type %c iface %u port %u",
1932                   type, iface, prt);
1933
1934         switch(type) {
1935         case SOCKET_TYPE_CHAR_TCP:
1936         case SOCKET_TYPE_CHAR_UDP: {
1937                 struct sockaddr_in *in2 = (struct sockaddr_in *)(void *)in;
1938
1939                 if ((*len) < sizeof(*in2)) {
1940                         SWRAP_LOG(SWRAP_LOG_ERROR,
1941                                   "V4: *len(%zu) < sizeof(*in2)=%zu",
1942                                   (size_t)*len, sizeof(*in2));
1943                         errno = EINVAL;
1944                         return -1;
1945                 }
1946
1947                 memset(in2, 0, sizeof(*in2));
1948                 in2->sin_family = AF_INET;
1949                 in2->sin_addr.s_addr = htonl(swrap_ipv4_iface(iface));
1950                 in2->sin_port = htons(prt);
1951
1952                 *len = sizeof(*in2);
1953                 break;
1954         }
1955 #ifdef HAVE_IPV6
1956         case SOCKET_TYPE_CHAR_TCP_V6:
1957         case SOCKET_TYPE_CHAR_UDP_V6: {
1958                 struct sockaddr_in6 *in2 = (struct sockaddr_in6 *)(void *)in;
1959
1960                 if ((*len) < sizeof(*in2)) {
1961                         SWRAP_LOG(SWRAP_LOG_ERROR,
1962                                   "V6: *len(%zu) < sizeof(*in2)=%zu",
1963                                   (size_t)*len, sizeof(*in2));
1964                         SWRAP_LOG(SWRAP_LOG_ERROR, "LINE:%d", __LINE__);
1965                         errno = EINVAL;
1966                         return -1;
1967                 }
1968
1969                 memset(in2, 0, sizeof(*in2));
1970                 in2->sin6_family = AF_INET6;
1971                 in2->sin6_addr = *swrap_ipv6();
1972                 in2->sin6_addr.s6_addr[15] = iface;
1973                 in2->sin6_port = htons(prt);
1974
1975                 *len = sizeof(*in2);
1976                 break;
1977         }
1978 #endif
1979         default:
1980                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
1981                           type, iface, prt);
1982                 errno = EINVAL;
1983                 return -1;
1984         }
1985
1986         return 0;
1987 }
1988
1989 static int convert_in_un_remote(struct socket_info *si, const struct sockaddr *inaddr, struct sockaddr_un *un,
1990                                 int *bcast)
1991 {
1992         char type = '\0';
1993         unsigned int prt;
1994         unsigned int iface;
1995         int is_bcast = 0;
1996         char *swrap_dir = NULL;
1997
1998         if (bcast) *bcast = 0;
1999
2000         switch (inaddr->sa_family) {
2001         case AF_INET: {
2002                 const struct sockaddr_in *in =
2003                     (const struct sockaddr_in *)(const void *)inaddr;
2004                 unsigned int addr = ntohl(in->sin_addr.s_addr);
2005                 char u_type = '\0';
2006                 char b_type = '\0';
2007                 char a_type = '\0';
2008                 const unsigned int sw_net_addr = swrap_ipv4_net();
2009                 const unsigned int sw_bcast_addr = swrap_ipv4_bcast();
2010
2011                 switch (si->type) {
2012                 case SOCK_STREAM:
2013                         u_type = SOCKET_TYPE_CHAR_TCP;
2014                         break;
2015                 case SOCK_DGRAM:
2016                         u_type = SOCKET_TYPE_CHAR_UDP;
2017                         a_type = SOCKET_TYPE_CHAR_UDP;
2018                         b_type = SOCKET_TYPE_CHAR_UDP;
2019                         break;
2020                 default:
2021                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2022                         errno = ESOCKTNOSUPPORT;
2023                         return -1;
2024                 }
2025
2026                 prt = ntohs(in->sin_port);
2027                 if (a_type && addr == 0xFFFFFFFF) {
2028                         /* 255.255.255.255 only udp */
2029                         is_bcast = 2;
2030                         type = a_type;
2031                         iface = socket_wrapper_default_iface();
2032                 } else if (b_type && addr == sw_bcast_addr) {
2033                         /*
2034                          * 127.255.255.255
2035                          * or
2036                          * 10.255.255.255
2037                          * only udp
2038                          */
2039                         is_bcast = 1;
2040                         type = b_type;
2041                         iface = socket_wrapper_default_iface();
2042                 } else if ((addr & 0xFFFFFF00) == sw_net_addr) {
2043                         /* 127.0.0.X or 10.53.57.X */
2044                         is_bcast = 0;
2045                         type = u_type;
2046                         iface = (addr & 0x000000FF);
2047                 } else {
2048                         char str[256] = {0,};
2049                         inet_ntop(inaddr->sa_family,
2050                                   &in->sin_addr,
2051                                   str, sizeof(str));
2052                         SWRAP_LOG(SWRAP_LOG_WARN,
2053                                   "str[%s] prt[%u]",
2054                                   str, (unsigned)prt);
2055                         errno = ENETUNREACH;
2056                         return -1;
2057                 }
2058                 if (bcast) *bcast = is_bcast;
2059                 break;
2060         }
2061 #ifdef HAVE_IPV6
2062         case AF_INET6: {
2063                 const struct sockaddr_in6 *in =
2064                     (const struct sockaddr_in6 *)(const void *)inaddr;
2065                 struct in6_addr cmp1, cmp2;
2066
2067                 switch (si->type) {
2068                 case SOCK_STREAM:
2069                         type = SOCKET_TYPE_CHAR_TCP_V6;
2070                         break;
2071                 case SOCK_DGRAM:
2072                         type = SOCKET_TYPE_CHAR_UDP_V6;
2073                         break;
2074                 default:
2075                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2076                         errno = ESOCKTNOSUPPORT;
2077                         return -1;
2078                 }
2079
2080                 /* XXX no multicast/broadcast */
2081
2082                 prt = ntohs(in->sin6_port);
2083
2084                 cmp1 = *swrap_ipv6();
2085                 cmp2 = in->sin6_addr;
2086                 cmp2.s6_addr[15] = 0;
2087                 if (IN6_ARE_ADDR_EQUAL(&cmp1, &cmp2)) {
2088                         iface = in->sin6_addr.s6_addr[15];
2089                 } else {
2090                         char str[256] = {0,};
2091                         inet_ntop(inaddr->sa_family,
2092                                   &in->sin6_addr,
2093                                   str, sizeof(str));
2094                         SWRAP_LOG(SWRAP_LOG_WARN,
2095                                   "str[%s] prt[%u]",
2096                                   str, (unsigned)prt);
2097                         errno = ENETUNREACH;
2098                         return -1;
2099                 }
2100
2101                 break;
2102         }
2103 #endif
2104         default:
2105                 SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family!");
2106                 errno = ENETUNREACH;
2107                 return -1;
2108         }
2109
2110         if (prt == 0) {
2111                 SWRAP_LOG(SWRAP_LOG_WARN, "Port not set");
2112                 errno = EINVAL;
2113                 return -1;
2114         }
2115
2116         swrap_dir = socket_wrapper_dir();
2117         if (swrap_dir == NULL) {
2118                 errno = EINVAL;
2119                 return -1;
2120         }
2121
2122         if (is_bcast) {
2123                 swrap_un_path_EINVAL(un, swrap_dir);
2124                 SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2125                 SAFE_FREE(swrap_dir);
2126                 /* the caller need to do more processing */
2127                 return 0;
2128         }
2129
2130         swrap_un_path(un, swrap_dir, type, iface, prt);
2131         SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2132
2133         SAFE_FREE(swrap_dir);
2134
2135         return 0;
2136 }
2137
2138 static int convert_in_un_alloc(struct socket_info *si, const struct sockaddr *inaddr, struct sockaddr_un *un,
2139                                int *bcast)
2140 {
2141         char type = '\0';
2142         unsigned int prt;
2143         unsigned int iface;
2144         struct stat st;
2145         int is_bcast = 0;
2146         char *swrap_dir = NULL;
2147
2148         if (bcast) *bcast = 0;
2149
2150         switch (si->family) {
2151         case AF_INET: {
2152                 const struct sockaddr_in *in =
2153                     (const struct sockaddr_in *)(const void *)inaddr;
2154                 unsigned int addr = ntohl(in->sin_addr.s_addr);
2155                 char u_type = '\0';
2156                 char d_type = '\0';
2157                 char b_type = '\0';
2158                 char a_type = '\0';
2159                 const unsigned int sw_net_addr = swrap_ipv4_net();
2160                 const unsigned int sw_bcast_addr = swrap_ipv4_bcast();
2161
2162                 prt = ntohs(in->sin_port);
2163
2164                 switch (si->type) {
2165                 case SOCK_STREAM:
2166                         u_type = SOCKET_TYPE_CHAR_TCP;
2167                         d_type = SOCKET_TYPE_CHAR_TCP;
2168                         break;
2169                 case SOCK_DGRAM:
2170                         u_type = SOCKET_TYPE_CHAR_UDP;
2171                         d_type = SOCKET_TYPE_CHAR_UDP;
2172                         a_type = SOCKET_TYPE_CHAR_UDP;
2173                         b_type = SOCKET_TYPE_CHAR_UDP;
2174                         break;
2175                 default:
2176                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2177                         errno = ESOCKTNOSUPPORT;
2178                         return -1;
2179                 }
2180
2181                 if (addr == 0) {
2182                         /* 0.0.0.0 */
2183                         is_bcast = 0;
2184                         type = d_type;
2185                         iface = socket_wrapper_default_iface();
2186                 } else if (a_type && addr == 0xFFFFFFFF) {
2187                         /* 255.255.255.255 only udp */
2188                         is_bcast = 2;
2189                         type = a_type;
2190                         iface = socket_wrapper_default_iface();
2191                 } else if (b_type && addr == sw_bcast_addr) {
2192                         /* 127.255.255.255 only udp */
2193                         is_bcast = 1;
2194                         type = b_type;
2195                         iface = socket_wrapper_default_iface();
2196                 } else if ((addr & 0xFFFFFF00) == sw_net_addr) {
2197                         /* 127.0.0.X */
2198                         is_bcast = 0;
2199                         type = u_type;
2200                         iface = (addr & 0x000000FF);
2201                 } else {
2202                         errno = EADDRNOTAVAIL;
2203                         return -1;
2204                 }
2205
2206                 /* Store the bind address for connect() */
2207                 if (si->bindname.sa_socklen == 0) {
2208                         struct sockaddr_in bind_in;
2209                         socklen_t blen = sizeof(struct sockaddr_in);
2210
2211                         ZERO_STRUCT(bind_in);
2212                         bind_in.sin_family = in->sin_family;
2213                         bind_in.sin_port = in->sin_port;
2214                         bind_in.sin_addr.s_addr = htonl(swrap_ipv4_iface(iface));
2215                         si->bindname.sa_socklen = blen;
2216                         memcpy(&si->bindname.sa.in, &bind_in, blen);
2217                 }
2218
2219                 break;
2220         }
2221 #ifdef HAVE_IPV6
2222         case AF_INET6: {
2223                 const struct sockaddr_in6 *in =
2224                     (const struct sockaddr_in6 *)(const void *)inaddr;
2225                 struct in6_addr cmp1, cmp2;
2226
2227                 switch (si->type) {
2228                 case SOCK_STREAM:
2229                         type = SOCKET_TYPE_CHAR_TCP_V6;
2230                         break;
2231                 case SOCK_DGRAM:
2232                         type = SOCKET_TYPE_CHAR_UDP_V6;
2233                         break;
2234                 default:
2235                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2236                         errno = ESOCKTNOSUPPORT;
2237                         return -1;
2238                 }
2239
2240                 /* XXX no multicast/broadcast */
2241
2242                 prt = ntohs(in->sin6_port);
2243
2244                 cmp1 = *swrap_ipv6();
2245                 cmp2 = in->sin6_addr;
2246                 cmp2.s6_addr[15] = 0;
2247                 if (IN6_IS_ADDR_UNSPECIFIED(&in->sin6_addr)) {
2248                         iface = socket_wrapper_default_iface();
2249                 } else if (IN6_ARE_ADDR_EQUAL(&cmp1, &cmp2)) {
2250                         iface = in->sin6_addr.s6_addr[15];
2251                 } else {
2252                         errno = EADDRNOTAVAIL;
2253                         return -1;
2254                 }
2255
2256                 /* Store the bind address for connect() */
2257                 if (si->bindname.sa_socklen == 0) {
2258                         struct sockaddr_in6 bind_in;
2259                         socklen_t blen = sizeof(struct sockaddr_in6);
2260
2261                         ZERO_STRUCT(bind_in);
2262                         bind_in.sin6_family = in->sin6_family;
2263                         bind_in.sin6_port = in->sin6_port;
2264
2265                         bind_in.sin6_addr = *swrap_ipv6();
2266                         bind_in.sin6_addr.s6_addr[15] = iface;
2267
2268                         memcpy(&si->bindname.sa.in6, &bind_in, blen);
2269                         si->bindname.sa_socklen = blen;
2270                 }
2271
2272                 break;
2273         }
2274 #endif
2275         default:
2276                 SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2277                 errno = EADDRNOTAVAIL;
2278                 return -1;
2279         }
2280
2281
2282         if (bcast) *bcast = is_bcast;
2283
2284         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
2285                 errno = EINVAL;
2286                 return -1;
2287         }
2288
2289         swrap_dir = socket_wrapper_dir();
2290         if (swrap_dir == NULL) {
2291                 errno = EINVAL;
2292                 return -1;
2293         }
2294
2295         if (prt == 0) {
2296                 /* handle auto-allocation of ephemeral ports */
2297                 for (prt = 5001; prt < 10000; prt++) {
2298                         swrap_un_path(un, swrap_dir, type, iface, prt);
2299                         if (stat(un->sun_path, &st) == 0) continue;
2300
2301                         set_port(si->family, prt, &si->myname);
2302                         set_port(si->family, prt, &si->bindname);
2303
2304                         break;
2305                 }
2306
2307                 if (prt == 10000) {
2308                         errno = ENFILE;
2309                         SAFE_FREE(swrap_dir);
2310                         return -1;
2311                 }
2312         }
2313
2314         swrap_un_path(un, swrap_dir, type, iface, prt);
2315         SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2316
2317         SAFE_FREE(swrap_dir);
2318
2319         return 0;
2320 }
2321
2322 static struct socket_info *find_socket_info(int fd)
2323 {
2324         int idx = find_socket_info_index(fd);
2325
2326         if (idx == -1) {
2327                 return NULL;
2328         }
2329
2330         return swrap_get_socket_info(idx);
2331 }
2332
2333 #if 0 /* FIXME */
2334 static bool check_addr_port_in_use(const struct sockaddr *sa, socklen_t len)
2335 {
2336         struct socket_info_fd *f;
2337         const struct socket_info *last_s = NULL;
2338
2339         /* first catch invalid input */
2340         switch (sa->sa_family) {
2341         case AF_INET:
2342                 if (len < sizeof(struct sockaddr_in)) {
2343                         return false;
2344                 }
2345                 break;
2346 #ifdef HAVE_IPV6
2347         case AF_INET6:
2348                 if (len < sizeof(struct sockaddr_in6)) {
2349                         return false;
2350                 }
2351                 break;
2352 #endif
2353         default:
2354                 return false;
2355                 break;
2356         }
2357
2358         for (f = socket_fds; f; f = f->next) {
2359                 struct socket_info *s = swrap_get_socket_info(f->si_index);
2360
2361                 if (s == last_s) {
2362                         continue;
2363                 }
2364                 last_s = s;
2365
2366                 if (s->myname == NULL) {
2367                         continue;
2368                 }
2369                 if (s->myname->sa_family != sa->sa_family) {
2370                         continue;
2371                 }
2372                 switch (s->myname->sa_family) {
2373                 case AF_INET: {
2374                         struct sockaddr_in *sin1, *sin2;
2375
2376                         sin1 = (struct sockaddr_in *)s->myname;
2377                         sin2 = (struct sockaddr_in *)sa;
2378
2379                         if (sin1->sin_addr.s_addr == htonl(INADDR_ANY)) {
2380                                 continue;
2381                         }
2382                         if (sin1->sin_port != sin2->sin_port) {
2383                                 continue;
2384                         }
2385                         if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr) {
2386                                 continue;
2387                         }
2388
2389                         /* found */
2390                         return true;
2391                         break;
2392                 }
2393 #ifdef HAVE_IPV6
2394                 case AF_INET6: {
2395                         struct sockaddr_in6 *sin1, *sin2;
2396
2397                         sin1 = (struct sockaddr_in6 *)s->myname;
2398                         sin2 = (struct sockaddr_in6 *)sa;
2399
2400                         if (sin1->sin6_port != sin2->sin6_port) {
2401                                 continue;
2402                         }
2403                         if (!IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2404                                                 &sin2->sin6_addr))
2405                         {
2406                                 continue;
2407                         }
2408
2409                         /* found */
2410                         return true;
2411                         break;
2412                 }
2413 #endif
2414                 default:
2415                         continue;
2416                         break;
2417
2418                 }
2419         }
2420
2421         return false;
2422 }
2423 #endif
2424
2425 static void swrap_remove_stale(int fd);
2426
2427 static int sockaddr_convert_to_un(struct socket_info *si,
2428                                   const struct sockaddr *in_addr,
2429                                   socklen_t in_len,
2430                                   struct sockaddr_un *out_addr,
2431                                   int alloc_sock,
2432                                   int *bcast)
2433 {
2434         struct sockaddr *out = (struct sockaddr *)(void *)out_addr;
2435
2436         (void) in_len; /* unused */
2437
2438         if (out_addr == NULL) {
2439                 return 0;
2440         }
2441
2442         out->sa_family = AF_UNIX;
2443 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2444         out->sa_len = sizeof(*out_addr);
2445 #endif
2446
2447         switch (in_addr->sa_family) {
2448         case AF_UNSPEC: {
2449                 const struct sockaddr_in *sin;
2450                 if (si->family != AF_INET) {
2451                         break;
2452                 }
2453                 if (in_len < sizeof(struct sockaddr_in)) {
2454                         break;
2455                 }
2456                 sin = (const struct sockaddr_in *)(const void *)in_addr;
2457                 if(sin->sin_addr.s_addr != htonl(INADDR_ANY)) {
2458                         break;
2459                 }
2460
2461                 /*
2462                  * Note: in the special case of AF_UNSPEC and INADDR_ANY,
2463                  * AF_UNSPEC is mapped to AF_INET and must be treated here.
2464                  */
2465
2466                 FALL_THROUGH;
2467         }
2468         case AF_INET:
2469 #ifdef HAVE_IPV6
2470         case AF_INET6:
2471 #endif
2472                 switch (si->type) {
2473                 case SOCK_STREAM:
2474                 case SOCK_DGRAM:
2475                         break;
2476                 default:
2477                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2478                         errno = ESOCKTNOSUPPORT;
2479                         return -1;
2480                 }
2481                 if (alloc_sock) {
2482                         return convert_in_un_alloc(si, in_addr, out_addr, bcast);
2483                 } else {
2484                         return convert_in_un_remote(si, in_addr, out_addr, bcast);
2485                 }
2486         default:
2487                 break;
2488         }
2489
2490         errno = EAFNOSUPPORT;
2491         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2492         return -1;
2493 }
2494
2495 static int sockaddr_convert_from_un(const struct socket_info *si,
2496                                     const struct sockaddr_un *in_addr,
2497                                     socklen_t un_addrlen,
2498                                     int family,
2499                                     struct sockaddr *out_addr,
2500                                     socklen_t *out_addrlen)
2501 {
2502         int ret;
2503
2504         if (out_addr == NULL || out_addrlen == NULL)
2505                 return 0;
2506
2507         if (un_addrlen == 0) {
2508                 *out_addrlen = 0;
2509                 return 0;
2510         }
2511
2512         switch (family) {
2513         case AF_INET:
2514 #ifdef HAVE_IPV6
2515         case AF_INET6:
2516 #endif
2517                 switch (si->type) {
2518                 case SOCK_STREAM:
2519                 case SOCK_DGRAM:
2520                         break;
2521                 default:
2522                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2523                         errno = ESOCKTNOSUPPORT;
2524                         return -1;
2525                 }
2526                 ret = convert_un_in(in_addr, out_addr, out_addrlen);
2527 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2528                 out_addr->sa_len = *out_addrlen;
2529 #endif
2530                 return ret;
2531         default:
2532                 break;
2533         }
2534
2535         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2536         errno = EAFNOSUPPORT;
2537         return -1;
2538 }
2539
2540 enum swrap_packet_type {
2541         SWRAP_CONNECT_SEND,
2542         SWRAP_CONNECT_UNREACH,
2543         SWRAP_CONNECT_RECV,
2544         SWRAP_CONNECT_ACK,
2545         SWRAP_ACCEPT_SEND,
2546         SWRAP_ACCEPT_RECV,
2547         SWRAP_ACCEPT_ACK,
2548         SWRAP_RECVFROM,
2549         SWRAP_SENDTO,
2550         SWRAP_SENDTO_UNREACH,
2551         SWRAP_PENDING_RST,
2552         SWRAP_RECV,
2553         SWRAP_RECV_RST,
2554         SWRAP_SEND,
2555         SWRAP_SEND_RST,
2556         SWRAP_CLOSE_SEND,
2557         SWRAP_CLOSE_RECV,
2558         SWRAP_CLOSE_ACK,
2559 };
2560
2561 struct swrap_file_hdr {
2562         uint32_t        magic;
2563         uint16_t        version_major;
2564         uint16_t        version_minor;
2565         int32_t         timezone;
2566         uint32_t        sigfigs;
2567         uint32_t        frame_max_len;
2568 #define SWRAP_FRAME_LENGTH_MAX 0xFFFF
2569         uint32_t        link_type;
2570 };
2571 #define SWRAP_FILE_HDR_SIZE 24
2572
2573 struct swrap_packet_frame {
2574         uint32_t seconds;
2575         uint32_t micro_seconds;
2576         uint32_t recorded_length;
2577         uint32_t full_length;
2578 };
2579 #define SWRAP_PACKET_FRAME_SIZE 16
2580
2581 union swrap_packet_ip {
2582         struct {
2583                 uint8_t         ver_hdrlen;
2584                 uint8_t         tos;
2585                 uint16_t        packet_length;
2586                 uint16_t        identification;
2587                 uint8_t         flags;
2588                 uint8_t         fragment;
2589                 uint8_t         ttl;
2590                 uint8_t         protocol;
2591                 uint16_t        hdr_checksum;
2592                 uint32_t        src_addr;
2593                 uint32_t        dest_addr;
2594         } v4;
2595 #define SWRAP_PACKET_IP_V4_SIZE 20
2596         struct {
2597                 uint8_t         ver_prio;
2598                 uint8_t         flow_label_high;
2599                 uint16_t        flow_label_low;
2600                 uint16_t        payload_length;
2601                 uint8_t         next_header;
2602                 uint8_t         hop_limit;
2603                 uint8_t         src_addr[16];
2604                 uint8_t         dest_addr[16];
2605         } v6;
2606 #define SWRAP_PACKET_IP_V6_SIZE 40
2607 };
2608 #define SWRAP_PACKET_IP_SIZE 40
2609
2610 union swrap_packet_payload {
2611         struct {
2612                 uint16_t        source_port;
2613                 uint16_t        dest_port;
2614                 uint32_t        seq_num;
2615                 uint32_t        ack_num;
2616                 uint8_t         hdr_length;
2617                 uint8_t         control;
2618                 uint16_t        window;
2619                 uint16_t        checksum;
2620                 uint16_t        urg;
2621         } tcp;
2622 #define SWRAP_PACKET_PAYLOAD_TCP_SIZE 20
2623         struct {
2624                 uint16_t        source_port;
2625                 uint16_t        dest_port;
2626                 uint16_t        length;
2627                 uint16_t        checksum;
2628         } udp;
2629 #define SWRAP_PACKET_PAYLOAD_UDP_SIZE 8
2630         struct {
2631                 uint8_t         type;
2632                 uint8_t         code;
2633                 uint16_t        checksum;
2634                 uint32_t        unused;
2635         } icmp4;
2636 #define SWRAP_PACKET_PAYLOAD_ICMP4_SIZE 8
2637         struct {
2638                 uint8_t         type;
2639                 uint8_t         code;
2640                 uint16_t        checksum;
2641                 uint32_t        unused;
2642         } icmp6;
2643 #define SWRAP_PACKET_PAYLOAD_ICMP6_SIZE 8
2644 };
2645 #define SWRAP_PACKET_PAYLOAD_SIZE 20
2646
2647 #define SWRAP_PACKET_MIN_ALLOC \
2648         (SWRAP_PACKET_FRAME_SIZE + \
2649          SWRAP_PACKET_IP_SIZE + \
2650          SWRAP_PACKET_PAYLOAD_SIZE)
2651
2652 static const char *swrap_pcap_init_file(void)
2653 {
2654         static int initialized = 0;
2655         static const char *s = NULL;
2656         static const struct swrap_file_hdr h;
2657         static const struct swrap_packet_frame f;
2658         static const union swrap_packet_ip i;
2659         static const union swrap_packet_payload p;
2660
2661         if (initialized == 1) {
2662                 return s;
2663         }
2664         initialized = 1;
2665
2666         /*
2667          * TODO: don't use the structs use plain buffer offsets
2668          *       and PUSH_U8(), PUSH_U16() and PUSH_U32()
2669          *
2670          * for now make sure we disable PCAP support
2671          * if the struct has alignment!
2672          */
2673         if (sizeof(h) != SWRAP_FILE_HDR_SIZE) {
2674                 return NULL;
2675         }
2676         if (sizeof(f) != SWRAP_PACKET_FRAME_SIZE) {
2677                 return NULL;
2678         }
2679         if (sizeof(i) != SWRAP_PACKET_IP_SIZE) {
2680                 return NULL;
2681         }
2682         if (sizeof(i.v4) != SWRAP_PACKET_IP_V4_SIZE) {
2683                 return NULL;
2684         }
2685         if (sizeof(i.v6) != SWRAP_PACKET_IP_V6_SIZE) {
2686                 return NULL;
2687         }
2688         if (sizeof(p) != SWRAP_PACKET_PAYLOAD_SIZE) {
2689                 return NULL;
2690         }
2691         if (sizeof(p.tcp) != SWRAP_PACKET_PAYLOAD_TCP_SIZE) {
2692                 return NULL;
2693         }
2694         if (sizeof(p.udp) != SWRAP_PACKET_PAYLOAD_UDP_SIZE) {
2695                 return NULL;
2696         }
2697         if (sizeof(p.icmp4) != SWRAP_PACKET_PAYLOAD_ICMP4_SIZE) {
2698                 return NULL;
2699         }
2700         if (sizeof(p.icmp6) != SWRAP_PACKET_PAYLOAD_ICMP6_SIZE) {
2701                 return NULL;
2702         }
2703
2704         s = getenv("SOCKET_WRAPPER_PCAP_FILE");
2705         if (s == NULL) {
2706                 return NULL;
2707         }
2708         if (strncmp(s, "./", 2) == 0) {
2709                 s += 2;
2710         }
2711         SWRAP_LOG(SWRAP_LOG_TRACE, "SOCKET_WRAPPER_PCAP_FILE: %s", s);
2712         return s;
2713 }
2714
2715 static uint8_t *swrap_pcap_packet_init(struct timeval *tval,
2716                                        const struct sockaddr *src,
2717                                        const struct sockaddr *dest,
2718                                        int socket_type,
2719                                        const uint8_t *payload,
2720                                        size_t payload_len,
2721                                        unsigned long tcp_seqno,
2722                                        unsigned long tcp_ack,
2723                                        unsigned char tcp_ctl,
2724                                        int unreachable,
2725                                        size_t *_packet_len)
2726 {
2727         uint8_t *base = NULL;
2728         uint8_t *buf = NULL;
2729         union {
2730                 uint8_t *ptr;
2731                 struct swrap_packet_frame *frame;
2732         } f;
2733         union {
2734                 uint8_t *ptr;
2735                 union swrap_packet_ip *ip;
2736         } i;
2737         union swrap_packet_payload *pay;
2738         size_t packet_len;
2739         size_t alloc_len;
2740         size_t nonwire_len = sizeof(struct swrap_packet_frame);
2741         size_t wire_hdr_len = 0;
2742         size_t wire_len = 0;
2743         size_t ip_hdr_len = 0;
2744         size_t icmp_hdr_len = 0;
2745         size_t icmp_truncate_len = 0;
2746         uint8_t protocol = 0, icmp_protocol = 0;
2747         const struct sockaddr_in *src_in = NULL;
2748         const struct sockaddr_in *dest_in = NULL;
2749 #ifdef HAVE_IPV6
2750         const struct sockaddr_in6 *src_in6 = NULL;
2751         const struct sockaddr_in6 *dest_in6 = NULL;
2752 #endif
2753         uint16_t src_port;
2754         uint16_t dest_port;
2755
2756         switch (src->sa_family) {
2757         case AF_INET:
2758                 src_in = (const struct sockaddr_in *)(const void *)src;
2759                 dest_in = (const struct sockaddr_in *)(const void *)dest;
2760                 src_port = src_in->sin_port;
2761                 dest_port = dest_in->sin_port;
2762                 ip_hdr_len = sizeof(i.ip->v4);
2763                 break;
2764 #ifdef HAVE_IPV6
2765         case AF_INET6:
2766                 src_in6 = (const struct sockaddr_in6 *)(const void *)src;
2767                 dest_in6 = (const struct sockaddr_in6 *)(const void *)dest;
2768                 src_port = src_in6->sin6_port;
2769                 dest_port = dest_in6->sin6_port;
2770                 ip_hdr_len = sizeof(i.ip->v6);
2771                 break;
2772 #endif
2773         default:
2774                 return NULL;
2775         }
2776
2777         switch (socket_type) {
2778         case SOCK_STREAM:
2779                 protocol = 0x06; /* TCP */
2780                 wire_hdr_len = ip_hdr_len + sizeof(pay->tcp);
2781                 wire_len = wire_hdr_len + payload_len;
2782                 break;
2783
2784         case SOCK_DGRAM:
2785                 protocol = 0x11; /* UDP */
2786                 wire_hdr_len = ip_hdr_len + sizeof(pay->udp);
2787                 wire_len = wire_hdr_len + payload_len;
2788                 break;
2789
2790         default:
2791                 return NULL;
2792         }
2793
2794         if (unreachable) {
2795                 icmp_protocol = protocol;
2796                 switch (src->sa_family) {
2797                 case AF_INET:
2798                         protocol = 0x01; /* ICMPv4 */
2799                         icmp_hdr_len = ip_hdr_len + sizeof(pay->icmp4);
2800                         break;
2801 #ifdef HAVE_IPV6
2802                 case AF_INET6:
2803                         protocol = 0x3A; /* ICMPv6 */
2804                         icmp_hdr_len = ip_hdr_len + sizeof(pay->icmp6);
2805                         break;
2806 #endif
2807                 }
2808                 if (wire_len > 64 ) {
2809                         icmp_truncate_len = wire_len - 64;
2810                 }
2811                 wire_len += icmp_hdr_len;
2812         }
2813
2814         packet_len = nonwire_len + wire_len;
2815         alloc_len = packet_len;
2816         if (alloc_len < SWRAP_PACKET_MIN_ALLOC) {
2817                 alloc_len = SWRAP_PACKET_MIN_ALLOC;
2818         }
2819
2820         base = (uint8_t *)calloc(1, alloc_len);
2821         if (base == NULL) {
2822                 return NULL;
2823         }
2824
2825         buf = base;
2826         f.ptr = buf;
2827
2828         f.frame->seconds                = tval->tv_sec;
2829         f.frame->micro_seconds  = tval->tv_usec;
2830         f.frame->recorded_length        = wire_len - icmp_truncate_len;
2831         f.frame->full_length    = wire_len - icmp_truncate_len;
2832
2833         buf += SWRAP_PACKET_FRAME_SIZE;
2834
2835         i.ptr = buf;
2836         switch (src->sa_family) {
2837         case AF_INET:
2838                 if (src_in == NULL || dest_in == NULL) {
2839                         SAFE_FREE(base);
2840                         return NULL;
2841                 }
2842
2843                 i.ip->v4.ver_hdrlen     = 0x45; /* version 4 and 5 * 32 bit words */
2844                 i.ip->v4.tos            = 0x00;
2845                 i.ip->v4.packet_length  = htons(wire_len - icmp_truncate_len);
2846                 i.ip->v4.identification = htons(0xFFFF);
2847                 i.ip->v4.flags          = 0x40; /* BIT 1 set - means don't fragment */
2848                 i.ip->v4.fragment       = htons(0x0000);
2849                 i.ip->v4.ttl            = 0xFF;
2850                 i.ip->v4.protocol       = protocol;
2851                 i.ip->v4.hdr_checksum   = htons(0x0000);
2852                 i.ip->v4.src_addr       = src_in->sin_addr.s_addr;
2853                 i.ip->v4.dest_addr      = dest_in->sin_addr.s_addr;
2854                 buf += SWRAP_PACKET_IP_V4_SIZE;
2855                 break;
2856 #ifdef HAVE_IPV6
2857         case AF_INET6:
2858                 if (src_in6 == NULL || dest_in6 == NULL) {
2859                         SAFE_FREE(base);
2860                         return NULL;
2861                 }
2862
2863                 i.ip->v6.ver_prio               = 0x60; /* version 4 and 5 * 32 bit words */
2864                 i.ip->v6.flow_label_high        = 0x00;
2865                 i.ip->v6.flow_label_low = 0x0000;
2866                 i.ip->v6.payload_length = htons(wire_len - icmp_truncate_len); /* TODO */
2867                 i.ip->v6.next_header    = protocol;
2868                 memcpy(i.ip->v6.src_addr, src_in6->sin6_addr.s6_addr, 16);
2869                 memcpy(i.ip->v6.dest_addr, dest_in6->sin6_addr.s6_addr, 16);
2870                 buf += SWRAP_PACKET_IP_V6_SIZE;
2871                 break;
2872 #endif
2873         }
2874
2875         if (unreachable) {
2876                 pay = (union swrap_packet_payload *)(void *)buf;
2877                 switch (src->sa_family) {
2878                 case AF_INET:
2879                         pay->icmp4.type         = 0x03; /* destination unreachable */
2880                         pay->icmp4.code         = 0x01; /* host unreachable */
2881                         pay->icmp4.checksum     = htons(0x0000);
2882                         pay->icmp4.unused       = htonl(0x00000000);
2883
2884                         buf += SWRAP_PACKET_PAYLOAD_ICMP4_SIZE;
2885
2886                         /* set the ip header in the ICMP payload */
2887                         i.ptr = buf;
2888                         i.ip->v4.ver_hdrlen     = 0x45; /* version 4 and 5 * 32 bit words */
2889                         i.ip->v4.tos            = 0x00;
2890                         i.ip->v4.packet_length  = htons(wire_len - icmp_hdr_len);
2891                         i.ip->v4.identification = htons(0xFFFF);
2892                         i.ip->v4.flags          = 0x40; /* BIT 1 set - means don't fragment */
2893                         i.ip->v4.fragment       = htons(0x0000);
2894                         i.ip->v4.ttl            = 0xFF;
2895                         i.ip->v4.protocol       = icmp_protocol;
2896                         i.ip->v4.hdr_checksum   = htons(0x0000);
2897                         i.ip->v4.src_addr       = dest_in->sin_addr.s_addr;
2898                         i.ip->v4.dest_addr      = src_in->sin_addr.s_addr;
2899
2900                         buf += SWRAP_PACKET_IP_V4_SIZE;
2901
2902                         src_port = dest_in->sin_port;
2903                         dest_port = src_in->sin_port;
2904                         break;
2905 #ifdef HAVE_IPV6
2906                 case AF_INET6:
2907                         pay->icmp6.type         = 0x01; /* destination unreachable */
2908                         pay->icmp6.code         = 0x03; /* address unreachable */
2909                         pay->icmp6.checksum     = htons(0x0000);
2910                         pay->icmp6.unused       = htonl(0x00000000);
2911                         buf += SWRAP_PACKET_PAYLOAD_ICMP6_SIZE;
2912
2913                         /* set the ip header in the ICMP payload */
2914                         i.ptr = buf;
2915                         i.ip->v6.ver_prio               = 0x60; /* version 4 and 5 * 32 bit words */
2916                         i.ip->v6.flow_label_high        = 0x00;
2917                         i.ip->v6.flow_label_low = 0x0000;
2918                         i.ip->v6.payload_length = htons(wire_len - icmp_truncate_len); /* TODO */
2919                         i.ip->v6.next_header    = protocol;
2920                         memcpy(i.ip->v6.src_addr, dest_in6->sin6_addr.s6_addr, 16);
2921                         memcpy(i.ip->v6.dest_addr, src_in6->sin6_addr.s6_addr, 16);
2922
2923                         buf += SWRAP_PACKET_IP_V6_SIZE;
2924
2925                         src_port = dest_in6->sin6_port;
2926                         dest_port = src_in6->sin6_port;
2927                         break;
2928 #endif
2929                 }
2930         }
2931
2932         pay = (union swrap_packet_payload *)(void *)buf;
2933
2934         switch (socket_type) {
2935         case SOCK_STREAM:
2936                 pay->tcp.source_port    = src_port;
2937                 pay->tcp.dest_port      = dest_port;
2938                 pay->tcp.seq_num        = htonl(tcp_seqno);
2939                 pay->tcp.ack_num        = htonl(tcp_ack);
2940                 pay->tcp.hdr_length     = 0x50; /* 5 * 32 bit words */
2941                 pay->tcp.control        = tcp_ctl;
2942                 pay->tcp.window         = htons(0x7FFF);
2943                 pay->tcp.checksum       = htons(0x0000);
2944                 pay->tcp.urg            = htons(0x0000);
2945                 buf += SWRAP_PACKET_PAYLOAD_TCP_SIZE;
2946
2947                 break;
2948
2949         case SOCK_DGRAM:
2950                 pay->udp.source_port    = src_port;
2951                 pay->udp.dest_port      = dest_port;
2952                 pay->udp.length         = htons(8 + payload_len);
2953                 pay->udp.checksum       = htons(0x0000);
2954                 buf += SWRAP_PACKET_PAYLOAD_UDP_SIZE;
2955
2956                 break;
2957         }
2958
2959         if (payload && payload_len > 0) {
2960                 memcpy(buf, payload, payload_len);
2961         }
2962
2963         *_packet_len = packet_len - icmp_truncate_len;
2964         return base;
2965 }
2966
2967 static int swrap_pcap_get_fd(const char *fname)
2968 {
2969         static int fd = -1;
2970
2971         if (fd != -1) {
2972                 return fd;
2973         }
2974
2975         fd = libc_open(fname, O_WRONLY|O_CREAT|O_EXCL|O_APPEND, 0644);
2976         if (fd != -1) {
2977                 struct swrap_file_hdr file_hdr;
2978                 file_hdr.magic          = 0xA1B2C3D4;
2979                 file_hdr.version_major  = 0x0002;
2980                 file_hdr.version_minor  = 0x0004;
2981                 file_hdr.timezone       = 0x00000000;
2982                 file_hdr.sigfigs        = 0x00000000;
2983                 file_hdr.frame_max_len  = SWRAP_FRAME_LENGTH_MAX;
2984                 file_hdr.link_type      = 0x0065; /* 101 RAW IP */
2985
2986                 if (libc_write(fd, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
2987                         libc_close(fd);
2988                         fd = -1;
2989                 }
2990                 return fd;
2991         }
2992
2993         fd = libc_open(fname, O_WRONLY|O_APPEND, 0644);
2994
2995         return fd;
2996 }
2997
2998 static uint8_t *swrap_pcap_marshall_packet(struct socket_info *si,
2999                                            const struct sockaddr *addr,
3000                                            enum swrap_packet_type type,
3001                                            const void *buf, size_t len,
3002                                            size_t *packet_len)
3003 {
3004         const struct sockaddr *src_addr;
3005         const struct sockaddr *dest_addr;
3006         unsigned long tcp_seqno = 0;
3007         unsigned long tcp_ack = 0;
3008         unsigned char tcp_ctl = 0;
3009         int unreachable = 0;
3010
3011         struct timeval tv;
3012
3013         switch (si->family) {
3014         case AF_INET:
3015                 break;
3016 #ifdef HAVE_IPV6
3017         case AF_INET6:
3018                 break;
3019 #endif
3020         default:
3021                 return NULL;
3022         }
3023
3024         switch (type) {
3025         case SWRAP_CONNECT_SEND:
3026                 if (si->type != SOCK_STREAM) {
3027                         return NULL;
3028                 }
3029
3030                 src_addr  = &si->myname.sa.s;
3031                 dest_addr = addr;
3032
3033                 tcp_seqno = si->io.pck_snd;
3034                 tcp_ack = si->io.pck_rcv;
3035                 tcp_ctl = 0x02; /* SYN */
3036
3037                 si->io.pck_snd += 1;
3038
3039                 break;
3040
3041         case SWRAP_CONNECT_RECV:
3042                 if (si->type != SOCK_STREAM) {
3043                         return NULL;
3044                 }
3045
3046                 dest_addr = &si->myname.sa.s;
3047                 src_addr = addr;
3048
3049                 tcp_seqno = si->io.pck_rcv;
3050                 tcp_ack = si->io.pck_snd;
3051                 tcp_ctl = 0x12; /** SYN,ACK */
3052
3053                 si->io.pck_rcv += 1;
3054
3055                 break;
3056
3057         case SWRAP_CONNECT_UNREACH:
3058                 if (si->type != SOCK_STREAM) {
3059                         return NULL;
3060                 }
3061
3062                 dest_addr = &si->myname.sa.s;
3063                 src_addr  = addr;
3064
3065                 /* Unreachable: resend the data of SWRAP_CONNECT_SEND */
3066                 tcp_seqno = si->io.pck_snd - 1;
3067                 tcp_ack = si->io.pck_rcv;
3068                 tcp_ctl = 0x02; /* SYN */
3069                 unreachable = 1;
3070
3071                 break;
3072
3073         case SWRAP_CONNECT_ACK:
3074                 if (si->type != SOCK_STREAM) {
3075                         return NULL;
3076                 }
3077
3078                 src_addr  = &si->myname.sa.s;
3079                 dest_addr = addr;
3080
3081                 tcp_seqno = si->io.pck_snd;
3082                 tcp_ack = si->io.pck_rcv;
3083                 tcp_ctl = 0x10; /* ACK */
3084
3085                 break;
3086
3087         case SWRAP_ACCEPT_SEND:
3088                 if (si->type != SOCK_STREAM) {
3089                         return NULL;
3090                 }
3091
3092                 dest_addr = &si->myname.sa.s;
3093                 src_addr = addr;
3094
3095                 tcp_seqno = si->io.pck_rcv;
3096                 tcp_ack = si->io.pck_snd;
3097                 tcp_ctl = 0x02; /* SYN */
3098
3099                 si->io.pck_rcv += 1;
3100
3101                 break;
3102
3103         case SWRAP_ACCEPT_RECV:
3104                 if (si->type != SOCK_STREAM) {
3105                         return NULL;
3106                 }
3107
3108                 src_addr = &si->myname.sa.s;
3109                 dest_addr = addr;
3110
3111                 tcp_seqno = si->io.pck_snd;
3112                 tcp_ack = si->io.pck_rcv;
3113                 tcp_ctl = 0x12; /* SYN,ACK */
3114
3115                 si->io.pck_snd += 1;
3116
3117                 break;
3118
3119         case SWRAP_ACCEPT_ACK:
3120                 if (si->type != SOCK_STREAM) {
3121                         return NULL;
3122                 }
3123
3124                 dest_addr = &si->myname.sa.s;
3125                 src_addr = addr;
3126
3127                 tcp_seqno = si->io.pck_rcv;
3128                 tcp_ack = si->io.pck_snd;
3129                 tcp_ctl = 0x10; /* ACK */
3130
3131                 break;
3132
3133         case SWRAP_SEND:
3134                 src_addr  = &si->myname.sa.s;
3135                 dest_addr = &si->peername.sa.s;
3136
3137                 tcp_seqno = si->io.pck_snd;
3138                 tcp_ack = si->io.pck_rcv;
3139                 tcp_ctl = 0x18; /* PSH,ACK */
3140
3141                 si->io.pck_snd += len;
3142
3143                 break;
3144
3145         case SWRAP_SEND_RST:
3146                 dest_addr = &si->myname.sa.s;
3147                 src_addr  = &si->peername.sa.s;
3148
3149                 if (si->type == SOCK_DGRAM) {
3150                         return swrap_pcap_marshall_packet(si,
3151                                                           &si->peername.sa.s,
3152                                                           SWRAP_SENDTO_UNREACH,
3153                                                           buf,
3154                                                           len,
3155                                                           packet_len);
3156                 }
3157
3158                 tcp_seqno = si->io.pck_rcv;
3159                 tcp_ack = si->io.pck_snd;
3160                 tcp_ctl = 0x14; /** RST,ACK */
3161
3162                 break;
3163
3164         case SWRAP_PENDING_RST:
3165                 dest_addr = &si->myname.sa.s;
3166                 src_addr  = &si->peername.sa.s;
3167
3168                 if (si->type == SOCK_DGRAM) {
3169                         return NULL;
3170                 }
3171
3172                 tcp_seqno = si->io.pck_rcv;
3173                 tcp_ack = si->io.pck_snd;
3174                 tcp_ctl = 0x14; /* RST,ACK */
3175
3176                 break;
3177
3178         case SWRAP_RECV:
3179                 dest_addr = &si->myname.sa.s;
3180                 src_addr  = &si->peername.sa.s;
3181
3182                 tcp_seqno = si->io.pck_rcv;
3183                 tcp_ack = si->io.pck_snd;
3184                 tcp_ctl = 0x18; /* PSH,ACK */
3185
3186                 si->io.pck_rcv += len;
3187
3188                 break;
3189
3190         case SWRAP_RECV_RST:
3191                 dest_addr = &si->myname.sa.s;
3192                 src_addr  = &si->peername.sa.s;
3193
3194                 if (si->type == SOCK_DGRAM) {
3195                         return NULL;
3196                 }
3197
3198                 tcp_seqno = si->io.pck_rcv;
3199                 tcp_ack = si->io.pck_snd;
3200                 tcp_ctl = 0x14; /* RST,ACK */
3201
3202                 break;
3203
3204         case SWRAP_SENDTO:
3205                 src_addr = &si->myname.sa.s;
3206                 dest_addr = addr;
3207
3208                 si->io.pck_snd += len;
3209
3210                 break;
3211
3212         case SWRAP_SENDTO_UNREACH:
3213                 dest_addr = &si->myname.sa.s;
3214                 src_addr = addr;
3215
3216                 unreachable = 1;
3217
3218                 break;
3219
3220         case SWRAP_RECVFROM:
3221                 dest_addr = &si->myname.sa.s;
3222                 src_addr = addr;
3223
3224                 si->io.pck_rcv += len;
3225
3226                 break;
3227
3228         case SWRAP_CLOSE_SEND:
3229                 if (si->type != SOCK_STREAM) {
3230                         return NULL;
3231                 }
3232
3233                 src_addr  = &si->myname.sa.s;
3234                 dest_addr = &si->peername.sa.s;
3235
3236                 tcp_seqno = si->io.pck_snd;
3237                 tcp_ack = si->io.pck_rcv;
3238                 tcp_ctl = 0x11; /* FIN, ACK */
3239
3240                 si->io.pck_snd += 1;
3241
3242                 break;
3243
3244         case SWRAP_CLOSE_RECV:
3245                 if (si->type != SOCK_STREAM) {
3246                         return NULL;
3247                 }
3248
3249                 dest_addr = &si->myname.sa.s;
3250                 src_addr  = &si->peername.sa.s;
3251
3252                 tcp_seqno = si->io.pck_rcv;
3253                 tcp_ack = si->io.pck_snd;
3254                 tcp_ctl = 0x11; /* FIN,ACK */
3255
3256                 si->io.pck_rcv += 1;
3257
3258                 break;
3259
3260         case SWRAP_CLOSE_ACK:
3261                 if (si->type != SOCK_STREAM) {
3262                         return NULL;
3263                 }
3264
3265                 src_addr  = &si->myname.sa.s;
3266                 dest_addr = &si->peername.sa.s;
3267
3268                 tcp_seqno = si->io.pck_snd;
3269                 tcp_ack = si->io.pck_rcv;
3270                 tcp_ctl = 0x10; /* ACK */
3271
3272                 break;
3273         default:
3274                 return NULL;
3275         }
3276
3277         swrapGetTimeOfDay(&tv);
3278
3279         return swrap_pcap_packet_init(&tv,
3280                                       src_addr,
3281                                       dest_addr,
3282                                       si->type,
3283                                       (const uint8_t *)buf,
3284                                       len,
3285                                       tcp_seqno,
3286                                       tcp_ack,
3287                                       tcp_ctl,
3288                                       unreachable,
3289                                       packet_len);
3290 }
3291
3292 static void swrap_pcap_dump_packet(struct socket_info *si,
3293                                    const struct sockaddr *addr,
3294                                    enum swrap_packet_type type,
3295                                    const void *buf, size_t len)
3296 {
3297         const char *file_name;
3298         uint8_t *packet;
3299         size_t packet_len = 0;
3300         int fd;
3301
3302         swrap_mutex_lock(&pcap_dump_mutex);
3303
3304         file_name = swrap_pcap_init_file();
3305         if (!file_name) {
3306                 goto done;
3307         }
3308
3309         packet = swrap_pcap_marshall_packet(si,
3310                                             addr,
3311                                             type,
3312                                             buf,
3313                                             len,
3314                                             &packet_len);
3315         if (packet == NULL) {
3316                 goto done;
3317         }
3318
3319         fd = swrap_pcap_get_fd(file_name);
3320         if (fd != -1) {
3321                 if (libc_write(fd, packet, packet_len) != (ssize_t)packet_len) {
3322                         free(packet);
3323                         goto done;
3324                 }
3325         }
3326
3327         free(packet);
3328
3329 done:
3330         swrap_mutex_unlock(&pcap_dump_mutex);
3331 }
3332
3333 /****************************************************************************
3334  *   SIGNALFD
3335  ***************************************************************************/
3336
3337 #ifdef HAVE_SIGNALFD
3338 static int swrap_signalfd(int fd, const sigset_t *mask, int flags)
3339 {
3340         int rc;
3341
3342         rc = libc_signalfd(fd, mask, flags);
3343         if (rc != -1) {
3344                 swrap_remove_stale(fd);
3345         }
3346
3347         return rc;
3348 }
3349
3350 int signalfd(int fd, const sigset_t *mask, int flags)
3351 {
3352         return swrap_signalfd(fd, mask, flags);
3353 }
3354 #endif
3355
3356 /****************************************************************************
3357  *   SOCKET
3358  ***************************************************************************/
3359
3360 static int swrap_socket(int family, int type, int protocol)
3361 {
3362         struct socket_info *si = NULL;
3363         struct socket_info _si = { 0 };
3364         int fd;
3365         int ret;
3366         int real_type = type;
3367
3368         /*
3369          * Remove possible addition flags passed to socket() so
3370          * do not fail checking the type.
3371          * See https://lwn.net/Articles/281965/
3372          */
3373 #ifdef SOCK_CLOEXEC
3374         real_type &= ~SOCK_CLOEXEC;
3375 #endif
3376 #ifdef SOCK_NONBLOCK
3377         real_type &= ~SOCK_NONBLOCK;
3378 #endif
3379
3380         if (!socket_wrapper_enabled()) {
3381                 return libc_socket(family, type, protocol);
3382         }
3383
3384         switch (family) {
3385         case AF_INET:
3386 #ifdef HAVE_IPV6
3387         case AF_INET6:
3388 #endif
3389                 break;
3390 #ifdef AF_NETLINK
3391         case AF_NETLINK:
3392 #endif /* AF_NETLINK */
3393 #ifdef AF_PACKET
3394         case AF_PACKET:
3395 #endif /* AF_PACKET */
3396         case AF_UNIX:
3397                 fd = libc_socket(family, type, protocol);
3398                 if (fd != -1) {
3399                         /* Check if we have a stale fd and remove it */
3400                         swrap_remove_stale(fd);
3401                         SWRAP_LOG(SWRAP_LOG_TRACE,
3402                                   "Unix socket fd=%d",
3403                                   fd);
3404                 }
3405                 return fd;
3406         default:
3407                 errno = EAFNOSUPPORT;
3408                 return -1;
3409         }
3410
3411         switch (real_type) {
3412         case SOCK_STREAM:
3413                 break;
3414         case SOCK_DGRAM:
3415                 break;
3416         default:
3417                 errno = EPROTONOSUPPORT;
3418                 return -1;
3419         }
3420
3421         switch (protocol) {
3422         case 0:
3423                 break;
3424         case 6:
3425                 if (real_type == SOCK_STREAM) {
3426                         break;
3427                 }
3428                 FALL_THROUGH;
3429         case 17:
3430                 if (real_type == SOCK_DGRAM) {
3431                         break;
3432                 }
3433                 FALL_THROUGH;
3434         default:
3435                 errno = EPROTONOSUPPORT;
3436                 return -1;
3437         }
3438
3439         /*
3440          * We must call libc_socket with type, from the caller, not the version
3441          * we removed SOCK_CLOEXEC and SOCK_NONBLOCK from
3442          */
3443         fd = libc_socket(AF_UNIX, type, 0);
3444
3445         if (fd == -1) {
3446                 return -1;
3447         }
3448
3449         /* Check if we have a stale fd and remove it */
3450         swrap_remove_stale(fd);
3451
3452         si = &_si;
3453         si->family = family;
3454
3455         /* however, the rest of the socket_wrapper code expects just
3456          * the type, not the flags */
3457         si->type = real_type;
3458         si->protocol = protocol;
3459
3460         /*
3461          * Setup myname so getsockname() can succeed to find out the socket
3462          * type.
3463          */
3464         switch(si->family) {
3465         case AF_INET: {
3466                 struct sockaddr_in sin = {
3467                         .sin_family = AF_INET,
3468                 };
3469
3470                 si->myname.sa_socklen = sizeof(struct sockaddr_in);
3471                 memcpy(&si->myname.sa.in, &sin, si->myname.sa_socklen);
3472                 break;
3473         }
3474 #ifdef HAVE_IPV6
3475         case AF_INET6: {
3476                 struct sockaddr_in6 sin6 = {
3477                         .sin6_family = AF_INET6,
3478                 };
3479
3480                 si->myname.sa_socklen = sizeof(struct sockaddr_in6);
3481                 memcpy(&si->myname.sa.in6, &sin6, si->myname.sa_socklen);
3482                 break;
3483         }
3484 #endif
3485         default:
3486                 errno = EINVAL;
3487                 return -1;
3488         }
3489
3490         ret = swrap_create_socket(si, fd);
3491         if (ret == -1) {
3492                 int saved_errno = errno;
3493                 libc_close(fd);
3494                 errno = saved_errno;
3495                 return -1;
3496         }
3497
3498         SWRAP_LOG(SWRAP_LOG_TRACE,
3499                   "Created %s socket for protocol %s, fd=%d",
3500                   family == AF_INET ? "IPv4" : "IPv6",
3501                   real_type == SOCK_DGRAM ? "UDP" : "TCP",
3502                   fd);
3503
3504         return fd;
3505 }
3506
3507 int socket(int family, int type, int protocol)
3508 {
3509         return swrap_socket(family, type, protocol);
3510 }
3511
3512 /****************************************************************************
3513  *   SOCKETPAIR
3514  ***************************************************************************/
3515
3516 static int swrap_socketpair(int family, int type, int protocol, int sv[2])
3517 {
3518         int rc;
3519
3520         rc = libc_socketpair(family, type, protocol, sv);
3521         if (rc != -1) {
3522                 swrap_remove_stale(sv[0]);
3523                 swrap_remove_stale(sv[1]);
3524         }
3525
3526         return rc;
3527 }
3528
3529 int socketpair(int family, int type, int protocol, int sv[2])
3530 {
3531         return swrap_socketpair(family, type, protocol, sv);
3532 }
3533
3534 /****************************************************************************
3535  *   SOCKETPAIR
3536  ***************************************************************************/
3537
3538 #ifdef HAVE_TIMERFD_CREATE
3539 static int swrap_timerfd_create(int clockid, int flags)
3540 {
3541         int fd;
3542
3543         fd = libc_timerfd_create(clockid, flags);
3544         if (fd != -1) {
3545                 swrap_remove_stale(fd);
3546         }
3547
3548         return fd;
3549 }
3550
3551 int timerfd_create(int clockid, int flags)
3552 {
3553         return swrap_timerfd_create(clockid, flags);
3554 }
3555 #endif
3556
3557 /****************************************************************************
3558  *   PIPE
3559  ***************************************************************************/
3560
3561 static int swrap_pipe(int pipefd[2])
3562 {
3563         int rc;
3564
3565         rc = libc_pipe(pipefd);
3566         if (rc != -1) {
3567                 swrap_remove_stale(pipefd[0]);
3568                 swrap_remove_stale(pipefd[1]);
3569         }
3570
3571         return rc;
3572 }
3573
3574 int pipe(int pipefd[2])
3575 {
3576         return swrap_pipe(pipefd);
3577 }
3578
3579 /****************************************************************************
3580  *   ACCEPT
3581  ***************************************************************************/
3582
3583 static int swrap_accept(int s,
3584                         struct sockaddr *addr,
3585                         socklen_t *addrlen,
3586                         int flags)
3587 {
3588         struct socket_info *parent_si, *child_si;
3589         struct socket_info new_si = { 0 };
3590         int fd;
3591         int idx;
3592         struct swrap_address un_addr = {
3593                 .sa_socklen = sizeof(struct sockaddr_un),
3594         };
3595         struct swrap_address un_my_addr = {
3596                 .sa_socklen = sizeof(struct sockaddr_un),
3597         };
3598         struct swrap_address in_addr = {
3599                 .sa_socklen = sizeof(struct sockaddr_storage),
3600         };
3601         struct swrap_address in_my_addr = {
3602                 .sa_socklen = sizeof(struct sockaddr_storage),
3603         };
3604         int ret;
3605
3606         parent_si = find_socket_info(s);
3607         if (!parent_si) {
3608 #ifdef HAVE_ACCEPT4
3609                 return libc_accept4(s, addr, addrlen, flags);
3610 #else
3611                 UNUSED(flags);
3612                 return libc_accept(s, addr, addrlen);
3613 #endif
3614         }
3615
3616
3617         /*
3618          * prevent parent_si from being altered / closed
3619          * while we read it
3620          */
3621         SWRAP_LOCK_SI(parent_si);
3622
3623         /*
3624          * assume out sockaddr have the same size as the in parent
3625          * socket family
3626          */
3627         in_addr.sa_socklen = socket_length(parent_si->family);
3628         if (in_addr.sa_socklen <= 0) {
3629                 SWRAP_UNLOCK_SI(parent_si);
3630                 errno = EINVAL;
3631                 return -1;
3632         }
3633
3634         SWRAP_UNLOCK_SI(parent_si);
3635
3636 #ifdef HAVE_ACCEPT4
3637         ret = libc_accept4(s, &un_addr.sa.s, &un_addr.sa_socklen, flags);
3638 #else
3639         UNUSED(flags);
3640         ret = libc_accept(s, &un_addr.sa.s, &un_addr.sa_socklen);
3641 #endif
3642         if (ret == -1) {
3643                 int saved_errno = errno;
3644                 if (saved_errno == ENOTSOCK) {
3645                         /* Remove stale fds */
3646                         swrap_remove_stale(s);
3647                 }
3648                 errno = saved_errno;
3649                 return ret;
3650         }
3651
3652         fd = ret;
3653
3654         /* Check if we have a stale fd and remove it */
3655         swrap_remove_stale(fd);
3656
3657         if (un_addr.sa.un.sun_path[0] == '\0') {
3658                 /*
3659                  * FreeBSD seems to have a problem where
3660                  * accept4() on the unix socket doesn't
3661                  * ECONNABORTED for already disconnected connections.
3662                  *
3663                  * Let's try libc_getpeername() to get the peer address
3664                  * as a fallback, but it'll likely return ENOTCONN,
3665                  * which we have to map to ECONNABORTED.
3666                  */
3667                 un_addr.sa_socklen = sizeof(struct sockaddr_un),
3668                 ret = libc_getpeername(fd, &un_addr.sa.s, &un_addr.sa_socklen);
3669                 if (ret == -1) {
3670                         int saved_errno = errno;
3671                         libc_close(fd);
3672                         if (saved_errno == ENOTCONN) {
3673                                 /*
3674                                  * If the connection is already disconnected
3675                                  * we should return ECONNABORTED.
3676                                  */
3677                                 saved_errno = ECONNABORTED;
3678                         }
3679                         errno = saved_errno;
3680                         return ret;
3681                 }
3682         }
3683
3684         ret = libc_getsockname(fd,
3685                                &un_my_addr.sa.s,
3686                                &un_my_addr.sa_socklen);
3687         if (ret == -1) {
3688                 int saved_errno = errno;
3689                 libc_close(fd);
3690                 if (saved_errno == ENOTCONN) {
3691                         /*
3692                          * If the connection is already disconnected
3693                          * we should return ECONNABORTED.
3694                          */
3695                         saved_errno = ECONNABORTED;
3696                 }
3697                 errno = saved_errno;
3698                 return ret;
3699         }
3700
3701         SWRAP_LOCK_SI(parent_si);
3702
3703         ret = sockaddr_convert_from_un(parent_si,
3704                                        &un_addr.sa.un,
3705                                        un_addr.sa_socklen,
3706                                        parent_si->family,
3707                                        &in_addr.sa.s,
3708                                        &in_addr.sa_socklen);
3709         if (ret == -1) {
3710                 int saved_errno = errno;
3711                 SWRAP_UNLOCK_SI(parent_si);
3712                 libc_close(fd);
3713                 errno = saved_errno;
3714                 return ret;
3715         }
3716
3717         child_si = &new_si;
3718
3719         child_si->family = parent_si->family;
3720         child_si->type = parent_si->type;
3721         child_si->protocol = parent_si->protocol;
3722         child_si->bound = 1;
3723         child_si->is_server = 1;
3724         child_si->connected = 1;
3725
3726         SWRAP_UNLOCK_SI(parent_si);
3727
3728         child_si->peername = (struct swrap_address) {
3729                 .sa_socklen = in_addr.sa_socklen,
3730         };
3731         memcpy(&child_si->peername.sa.ss, &in_addr.sa.ss, in_addr.sa_socklen);
3732
3733         if (addr != NULL && addrlen != NULL) {
3734                 size_t copy_len = MIN(*addrlen, in_addr.sa_socklen);
3735                 if (copy_len > 0) {
3736                         memcpy(addr, &in_addr.sa.ss, copy_len);
3737                 }
3738                 *addrlen = in_addr.sa_socklen;
3739         }
3740
3741         ret = sockaddr_convert_from_un(child_si,
3742                                        &un_my_addr.sa.un,
3743                                        un_my_addr.sa_socklen,
3744                                        child_si->family,
3745                                        &in_my_addr.sa.s,
3746                                        &in_my_addr.sa_socklen);
3747         if (ret == -1) {
3748                 int saved_errno = errno;
3749                 libc_close(fd);
3750                 errno = saved_errno;
3751                 return ret;
3752         }
3753
3754         SWRAP_LOG(SWRAP_LOG_TRACE,
3755                   "accept() path=%s, fd=%d",
3756                   un_my_addr.sa.un.sun_path, s);
3757
3758         child_si->myname = (struct swrap_address) {
3759                 .sa_socklen = in_my_addr.sa_socklen,
3760         };
3761         memcpy(&child_si->myname.sa.ss, &in_my_addr.sa.ss, in_my_addr.sa_socklen);
3762
3763         idx = swrap_create_socket(&new_si, fd);
3764         if (idx == -1) {
3765                 int saved_errno = errno;
3766                 libc_close(fd);
3767                 errno = saved_errno;
3768                 return -1;
3769         }
3770
3771         if (addr != NULL) {
3772                 struct socket_info *si = swrap_get_socket_info(idx);
3773
3774                 SWRAP_LOCK_SI(si);
3775                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_SEND, NULL, 0);
3776                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_RECV, NULL, 0);
3777                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_ACK, NULL, 0);
3778                 SWRAP_UNLOCK_SI(si);
3779         }
3780
3781         return fd;
3782 }
3783
3784 #ifdef HAVE_ACCEPT4
3785 int accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags)
3786 {
3787         return swrap_accept(s, addr, (socklen_t *)addrlen, flags);
3788 }
3789 #endif
3790
3791 #ifdef HAVE_ACCEPT_PSOCKLEN_T
3792 int accept(int s, struct sockaddr *addr, Psocklen_t addrlen)
3793 #else
3794 int accept(int s, struct sockaddr *addr, socklen_t *addrlen)
3795 #endif
3796 {
3797         return swrap_accept(s, addr, (socklen_t *)addrlen, 0);
3798 }
3799
3800 static int autobind_start_init;
3801 static int autobind_start;
3802
3803 /* using sendto() or connect() on an unbound socket would give the
3804    recipient no way to reply, as unlike UDP and TCP, a unix domain
3805    socket can't auto-assign ephemeral port numbers, so we need to
3806    assign it here.
3807    Note: this might change the family from ipv6 to ipv4
3808 */
3809 static int swrap_auto_bind(int fd, struct socket_info *si, int family)
3810 {
3811         struct swrap_address un_addr = {
3812                 .sa_socklen = sizeof(struct sockaddr_un),
3813         };
3814         int i;
3815         char type;
3816         int ret;
3817         int port;
3818         char *swrap_dir = NULL;
3819
3820         swrap_mutex_lock(&autobind_start_mutex);
3821
3822         if (autobind_start_init != 1) {
3823                 autobind_start_init = 1;
3824                 autobind_start = getpid();
3825                 autobind_start %= 50000;
3826                 autobind_start += 10000;
3827         }
3828
3829         un_addr.sa.un.sun_family = AF_UNIX;
3830
3831         switch (family) {
3832         case AF_INET: {
3833                 struct sockaddr_in in;
3834
3835                 switch (si->type) {
3836                 case SOCK_STREAM:
3837                         type = SOCKET_TYPE_CHAR_TCP;
3838                         break;
3839                 case SOCK_DGRAM:
3840                         type = SOCKET_TYPE_CHAR_UDP;
3841                         break;
3842                 default:
3843                         errno = ESOCKTNOSUPPORT;
3844                         ret = -1;
3845                         goto done;
3846                 }
3847
3848                 memset(&in, 0, sizeof(in));
3849                 in.sin_family = AF_INET;
3850                 in.sin_addr.s_addr = htonl(swrap_ipv4_iface(
3851                                            socket_wrapper_default_iface()));
3852
3853                 si->myname = (struct swrap_address) {
3854                         .sa_socklen = sizeof(in),
3855                 };
3856                 memcpy(&si->myname.sa.in, &in, si->myname.sa_socklen);
3857                 break;
3858         }
3859 #ifdef HAVE_IPV6
3860         case AF_INET6: {
3861                 struct sockaddr_in6 in6;
3862
3863                 if (si->family != family) {
3864                         errno = ENETUNREACH;
3865                         ret = -1;
3866                         goto done;
3867                 }
3868
3869                 switch (si->type) {
3870                 case SOCK_STREAM:
3871                         type = SOCKET_TYPE_CHAR_TCP_V6;
3872                         break;
3873                 case SOCK_DGRAM:
3874                         type = SOCKET_TYPE_CHAR_UDP_V6;
3875                         break;
3876                 default:
3877                         errno = ESOCKTNOSUPPORT;
3878                         ret = -1;
3879                         goto done;
3880                 }
3881
3882                 memset(&in6, 0, sizeof(in6));
3883                 in6.sin6_family = AF_INET6;
3884                 in6.sin6_addr = *swrap_ipv6();
3885                 in6.sin6_addr.s6_addr[15] = socket_wrapper_default_iface();
3886
3887                 si->myname = (struct swrap_address) {
3888                         .sa_socklen = sizeof(in6),
3889                 };
3890                 memcpy(&si->myname.sa.in6, &in6, si->myname.sa_socklen);
3891                 break;
3892         }
3893 #endif
3894         default:
3895                 errno = ESOCKTNOSUPPORT;
3896                 ret = -1;
3897                 goto done;
3898         }
3899
3900         if (autobind_start > 60000) {
3901                 autobind_start = 10000;
3902         }
3903
3904         swrap_dir = socket_wrapper_dir();
3905         if (swrap_dir == NULL) {
3906                 errno = EINVAL;
3907                 ret = -1;
3908                 goto done;
3909         }
3910
3911         for (i = 0; i < SOCKET_MAX_SOCKETS; i++) {
3912                 port = autobind_start + i;
3913                 swrap_un_path(&un_addr.sa.un,
3914                               swrap_dir,
3915                               type,
3916                               socket_wrapper_default_iface(),
3917                               port);
3918
3919                 ret = libc_bind(fd, &un_addr.sa.s, un_addr.sa_socklen);
3920                 if (ret == -1) {
3921                         if (errno == EALREADY || errno == EADDRINUSE) {
3922                                 continue;
3923                         }
3924                         goto done;
3925                 }
3926
3927                 si->un_addr = un_addr.sa.un;
3928
3929                 si->bound = 1;
3930                 autobind_start = port + 1;
3931                 break;
3932         }
3933         if (i == SOCKET_MAX_SOCKETS) {
3934                 SWRAP_LOG(SWRAP_LOG_ERROR, "Too many open unix sockets (%u) for "
3935                                            "interface "SOCKET_FORMAT,
3936                                            SOCKET_MAX_SOCKETS,
3937                                            type,
3938                                            socket_wrapper_default_iface(),
3939                                            0);
3940                 errno = ENFILE;
3941                 ret = -1;
3942                 goto done;
3943         }
3944
3945         si->family = family;
3946         set_port(si->family, port, &si->myname);
3947
3948         ret = 0;
3949
3950 done:
3951         SAFE_FREE(swrap_dir);
3952         swrap_mutex_unlock(&autobind_start_mutex);
3953         return ret;
3954 }
3955
3956 /****************************************************************************
3957  *   CONNECT
3958  ***************************************************************************/
3959
3960 static int swrap_connect(int s, const struct sockaddr *serv_addr,
3961                          socklen_t addrlen)
3962 {
3963         int ret;
3964         struct swrap_address un_addr = {
3965                 .sa_socklen = sizeof(struct sockaddr_un),
3966         };
3967         struct socket_info *si = find_socket_info(s);
3968         int bcast = 0;
3969
3970         if (!si) {
3971                 return libc_connect(s, serv_addr, addrlen);
3972         }
3973
3974         SWRAP_LOCK_SI(si);
3975
3976         if (si->bound == 0) {
3977                 ret = swrap_auto_bind(s, si, serv_addr->sa_family);
3978                 if (ret == -1) {
3979                         goto done;
3980                 }
3981         }
3982
3983         if (si->family != serv_addr->sa_family) {
3984                 SWRAP_LOG(SWRAP_LOG_ERROR,
3985                           "called for fd=%d (family=%d) called with invalid family=%d",
3986                           s, si->family, serv_addr->sa_family);
3987                 errno = EINVAL;
3988                 ret = -1;
3989                 goto done;
3990         }
3991
3992         ret = sockaddr_convert_to_un(si, serv_addr,
3993                                      addrlen, &un_addr.sa.un, 0, &bcast);
3994         if (ret == -1) {
3995                 goto done;
3996         }
3997
3998         if (bcast) {
3999                 errno = ENETUNREACH;
4000                 ret = -1;
4001                 goto done;
4002         }
4003
4004         if (si->type == SOCK_DGRAM) {
4005                 si->defer_connect = 1;
4006                 ret = 0;
4007         } else {
4008                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_SEND, NULL, 0);
4009
4010                 ret = libc_connect(s,
4011                                    &un_addr.sa.s,
4012                                    un_addr.sa_socklen);
4013         }
4014
4015         SWRAP_LOG(SWRAP_LOG_TRACE,
4016                   "connect() path=%s, fd=%d",
4017                   un_addr.sa.un.sun_path, s);
4018
4019
4020         /* to give better errors */
4021         if (ret == -1 && errno == ENOENT) {
4022                 errno = EHOSTUNREACH;
4023         }
4024
4025         if (ret == 0) {
4026                 si->peername = (struct swrap_address) {
4027                         .sa_socklen = addrlen,
4028                 };
4029
4030                 memcpy(&si->peername.sa.ss, serv_addr, addrlen);
4031                 si->connected = 1;
4032
4033                 /*
4034                  * When we connect() on a socket than we have to bind the
4035                  * outgoing connection on the interface we use for the
4036                  * transport. We already bound it on the right interface
4037                  * but here we have to update the name so getsockname()
4038                  * returns correct information.
4039                  */
4040                 if (si->bindname.sa_socklen > 0) {
4041                         si->myname = (struct swrap_address) {
4042                                 .sa_socklen = si->bindname.sa_socklen,
4043                         };
4044
4045                         memcpy(&si->myname.sa.ss,
4046                                &si->bindname.sa.ss,
4047                                si->bindname.sa_socklen);
4048
4049                         /* Cleanup bindname */
4050                         si->bindname = (struct swrap_address) {
4051                                 .sa_socklen = 0,
4052                         };
4053                 }
4054
4055                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_RECV, NULL, 0);
4056                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_ACK, NULL, 0);
4057         } else {
4058                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_UNREACH, NULL, 0);
4059         }
4060
4061 done:
4062         SWRAP_UNLOCK_SI(si);
4063         return ret;
4064 }
4065
4066 int connect(int s, const struct sockaddr *serv_addr, socklen_t addrlen)
4067 {
4068         return swrap_connect(s, serv_addr, addrlen);
4069 }
4070
4071 /****************************************************************************
4072  *   BIND
4073  ***************************************************************************/
4074
4075 static int swrap_bind(int s, const struct sockaddr *myaddr, socklen_t addrlen)
4076 {
4077         int ret;
4078         struct swrap_address un_addr = {
4079                 .sa_socklen = sizeof(struct sockaddr_un),
4080         };
4081         struct socket_info *si = find_socket_info(s);
4082         int bind_error = 0;
4083 #if 0 /* FIXME */
4084         bool in_use;
4085 #endif
4086
4087         if (!si) {
4088                 return libc_bind(s, myaddr, addrlen);
4089         }
4090
4091         SWRAP_LOCK_SI(si);
4092
4093         switch (si->family) {
4094         case AF_INET: {
4095                 const struct sockaddr_in *sin;
4096                 if (addrlen < sizeof(struct sockaddr_in)) {
4097                         bind_error = EINVAL;
4098                         break;
4099                 }
4100
4101                 sin = (const struct sockaddr_in *)(const void *)myaddr;
4102
4103                 if (sin->sin_family != AF_INET) {
4104                         bind_error = EAFNOSUPPORT;
4105                 }
4106
4107                 /* special case for AF_UNSPEC */
4108                 if (sin->sin_family == AF_UNSPEC &&
4109                     (sin->sin_addr.s_addr == htonl(INADDR_ANY)))
4110                 {
4111                         bind_error = 0;
4112                 }
4113
4114                 break;
4115         }
4116 #ifdef HAVE_IPV6
4117         case AF_INET6: {
4118                 const struct sockaddr_in6 *sin6;
4119                 if (addrlen < sizeof(struct sockaddr_in6)) {
4120                         bind_error = EINVAL;
4121                         break;
4122                 }
4123
4124                 sin6 = (const struct sockaddr_in6 *)(const void *)myaddr;
4125
4126                 if (sin6->sin6_family != AF_INET6) {
4127                         bind_error = EAFNOSUPPORT;
4128                 }
4129
4130                 break;
4131         }
4132 #endif
4133         default:
4134                 bind_error = EINVAL;
4135                 break;
4136         }
4137
4138         if (bind_error != 0) {
4139                 errno = bind_error;
4140                 ret = -1;
4141                 goto out;
4142         }
4143
4144 #if 0 /* FIXME */
4145         in_use = check_addr_port_in_use(myaddr, addrlen);
4146         if (in_use) {
4147                 errno = EADDRINUSE;
4148                 ret = -1;
4149                 goto out;
4150         }
4151 #endif
4152
4153         si->myname.sa_socklen = addrlen;
4154         memcpy(&si->myname.sa.ss, myaddr, addrlen);
4155
4156         ret = sockaddr_convert_to_un(si,
4157                                      myaddr,
4158                                      addrlen,
4159                                      &un_addr.sa.un,
4160                                      1,
4161                                      &si->bcast);
4162         if (ret == -1) {
4163                 goto out;
4164         }
4165
4166         unlink(un_addr.sa.un.sun_path);
4167
4168         ret = libc_bind(s, &un_addr.sa.s, un_addr.sa_socklen);
4169
4170         SWRAP_LOG(SWRAP_LOG_TRACE,
4171                   "bind() path=%s, fd=%d",
4172                   un_addr.sa.un.sun_path, s);
4173
4174         if (ret == 0) {
4175                 si->bound = 1;
4176         }
4177
4178 out:
4179         SWRAP_UNLOCK_SI(si);
4180
4181         return ret;
4182 }
4183
4184 int bind(int s, const struct sockaddr *myaddr, socklen_t addrlen)
4185 {
4186         return swrap_bind(s, myaddr, addrlen);
4187 }
4188
4189 /****************************************************************************
4190  *   BINDRESVPORT
4191  ***************************************************************************/
4192
4193 #ifdef HAVE_BINDRESVPORT
4194 static int swrap_getsockname(int s, struct sockaddr *name, socklen_t *addrlen);
4195
4196 static int swrap_bindresvport_sa(int sd, struct sockaddr *sa)
4197 {
4198         struct swrap_address myaddr = {
4199                 .sa_socklen = sizeof(struct sockaddr_storage),
4200         };
4201         socklen_t salen;
4202         static uint16_t port;
4203         uint16_t i;
4204         int rc = -1;
4205         int af;
4206
4207 #define SWRAP_STARTPORT 600
4208 #define SWRAP_ENDPORT (IPPORT_RESERVED - 1)
4209 #define SWRAP_NPORTS (SWRAP_ENDPORT - SWRAP_STARTPORT + 1)
4210
4211         if (port == 0) {
4212                 port = (getpid() % SWRAP_NPORTS) + SWRAP_STARTPORT;
4213         }
4214
4215         if (sa == NULL) {
4216                 salen = myaddr.sa_socklen;
4217                 sa = &myaddr.sa.s;
4218
4219                 rc = swrap_getsockname(sd, &myaddr.sa.s, &salen);
4220                 if (rc < 0) {
4221                         return -1;
4222                 }
4223
4224                 af = sa->sa_family;
4225                 memset(&myaddr.sa.ss, 0, salen);
4226         } else {
4227                 af = sa->sa_family;
4228         }
4229
4230         for (i = 0; i < SWRAP_NPORTS; i++, port++) {
4231                 switch(af) {
4232                 case AF_INET: {
4233                         struct sockaddr_in *sinp = (struct sockaddr_in *)(void *)sa;
4234
4235                         salen = sizeof(struct sockaddr_in);
4236                         sinp->sin_port = htons(port);
4237                         break;
4238                 }
4239                 case AF_INET6: {
4240                         struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)(void *)sa;
4241
4242                         salen = sizeof(struct sockaddr_in6);
4243                         sin6p->sin6_port = htons(port);
4244                         break;
4245                 }
4246                 default:
4247                         errno = EAFNOSUPPORT;
4248                         return -1;
4249                 }
4250                 sa->sa_family = af;
4251
4252                 if (port > SWRAP_ENDPORT) {
4253                         port = SWRAP_STARTPORT;
4254                 }
4255
4256                 rc = swrap_bind(sd, (struct sockaddr *)sa, salen);
4257                 if (rc == 0 || errno != EADDRINUSE) {
4258                         break;
4259                 }
4260         }
4261
4262         return rc;
4263 }
4264
4265 int bindresvport(int sockfd, struct sockaddr_in *sinp)
4266 {
4267         return swrap_bindresvport_sa(sockfd, (struct sockaddr *)sinp);
4268 }
4269 #endif
4270
4271 /****************************************************************************
4272  *   LISTEN
4273  ***************************************************************************/
4274
4275 static int swrap_listen(int s, int backlog)
4276 {
4277         int ret;
4278         struct socket_info *si = find_socket_info(s);
4279
4280         if (!si) {
4281                 return libc_listen(s, backlog);
4282         }
4283
4284         SWRAP_LOCK_SI(si);
4285
4286         if (si->bound == 0) {
4287                 ret = swrap_auto_bind(s, si, si->family);
4288                 if (ret == -1) {
4289                         errno = EADDRINUSE;
4290                         goto out;
4291                 }
4292         }
4293
4294         ret = libc_listen(s, backlog);
4295         if (ret == 0) {
4296                 si->listening = 1;
4297         }
4298
4299 out:
4300         SWRAP_UNLOCK_SI(si);
4301
4302         return ret;
4303 }
4304
4305 int listen(int s, int backlog)
4306 {
4307         return swrap_listen(s, backlog);
4308 }
4309
4310 /****************************************************************************
4311  *   FOPEN
4312  ***************************************************************************/
4313
4314 static FILE *swrap_fopen(const char *name, const char *mode)
4315 {
4316         FILE *fp;
4317
4318         fp = libc_fopen(name, mode);
4319         if (fp != NULL) {
4320                 int fd = fileno(fp);
4321
4322                 swrap_remove_stale(fd);
4323         }
4324
4325         return fp;
4326 }
4327
4328 FILE *fopen(const char *name, const char *mode)
4329 {
4330         return swrap_fopen(name, mode);
4331 }
4332
4333 /****************************************************************************
4334  *   FOPEN64
4335  ***************************************************************************/
4336
4337 #ifdef HAVE_FOPEN64
4338 static FILE *swrap_fopen64(const char *name, const char *mode)
4339 {
4340         FILE *fp;
4341
4342         fp = libc_fopen64(name, mode);
4343         if (fp != NULL) {
4344                 int fd = fileno(fp);
4345
4346                 swrap_remove_stale(fd);
4347         }
4348
4349         return fp;
4350 }
4351
4352 FILE *fopen64(const char *name, const char *mode)
4353 {
4354         return swrap_fopen64(name, mode);
4355 }
4356 #endif /* HAVE_FOPEN64 */
4357
4358 /****************************************************************************
4359  *   OPEN
4360  ***************************************************************************/
4361
4362 static int swrap_vopen(const char *pathname, int flags, va_list ap)
4363 {
4364         int ret;
4365
4366         ret = libc_vopen(pathname, flags, ap);
4367         if (ret != -1) {
4368                 /*
4369                  * There are methods for closing descriptors (libc-internal code
4370                  * paths, direct syscalls) which close descriptors in ways that
4371                  * we can't intercept, so try to recover when we notice that
4372                  * that's happened
4373                  */
4374                 swrap_remove_stale(ret);
4375         }
4376         return ret;
4377 }
4378
4379 int open(const char *pathname, int flags, ...)
4380 {
4381         va_list ap;
4382         int fd;
4383
4384         va_start(ap, flags);
4385         fd = swrap_vopen(pathname, flags, ap);
4386         va_end(ap);
4387
4388         return fd;
4389 }
4390
4391 /****************************************************************************
4392  *   OPEN64
4393  ***************************************************************************/
4394
4395 #ifdef HAVE_OPEN64
4396 static int swrap_vopen64(const char *pathname, int flags, va_list ap)
4397 {
4398         int ret;
4399
4400         ret = libc_vopen64(pathname, flags, ap);
4401         if (ret != -1) {
4402                 /*
4403                  * There are methods for closing descriptors (libc-internal code
4404                  * paths, direct syscalls) which close descriptors in ways that
4405                  * we can't intercept, so try to recover when we notice that
4406                  * that's happened
4407                  */
4408                 swrap_remove_stale(ret);
4409         }
4410         return ret;
4411 }
4412
4413 int open64(const char *pathname, int flags, ...)
4414 {
4415         va_list ap;
4416         int fd;
4417
4418         va_start(ap, flags);
4419         fd = swrap_vopen64(pathname, flags, ap);
4420         va_end(ap);
4421
4422         return fd;
4423 }
4424 #endif /* HAVE_OPEN64 */
4425
4426 /****************************************************************************
4427  *   OPENAT
4428  ***************************************************************************/
4429
4430 static int swrap_vopenat(int dirfd, const char *path, int flags, va_list ap)
4431 {
4432         int ret;
4433
4434         ret = libc_vopenat(dirfd, path, flags, ap);
4435         if (ret != -1) {
4436                 /*
4437                  * There are methods for closing descriptors (libc-internal code
4438                  * paths, direct syscalls) which close descriptors in ways that
4439                  * we can't intercept, so try to recover when we notice that
4440                  * that's happened
4441                  */
4442                 swrap_remove_stale(ret);
4443         }
4444
4445         return ret;
4446 }
4447
4448 int openat(int dirfd, const char *path, int flags, ...)
4449 {
4450         va_list ap;
4451         int fd;
4452
4453         va_start(ap, flags);
4454         fd = swrap_vopenat(dirfd, path, flags, ap);
4455         va_end(ap);
4456
4457         return fd;
4458 }
4459
4460 /****************************************************************************
4461  *   GETPEERNAME
4462  ***************************************************************************/
4463
4464 static int swrap_getpeername(int s, struct sockaddr *name, socklen_t *addrlen)
4465 {
4466         struct socket_info *si = find_socket_info(s);
4467         socklen_t len;
4468         int ret = -1;
4469
4470         if (!si) {
4471                 return libc_getpeername(s, name, addrlen);
4472         }
4473
4474         SWRAP_LOCK_SI(si);
4475
4476         if (si->peername.sa_socklen == 0)
4477         {
4478                 errno = ENOTCONN;
4479                 goto out;
4480         }
4481
4482         len = MIN(*addrlen, si->peername.sa_socklen);
4483         if (len == 0) {
4484                 ret = 0;
4485                 goto out;
4486         }
4487
4488         memcpy(name, &si->peername.sa.ss, len);
4489         *addrlen = si->peername.sa_socklen;
4490
4491         ret = 0;
4492 out:
4493         SWRAP_UNLOCK_SI(si);
4494
4495         return ret;
4496 }
4497
4498 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4499 int getpeername(int s, struct sockaddr *name, Psocklen_t addrlen)
4500 #else
4501 int getpeername(int s, struct sockaddr *name, socklen_t *addrlen)
4502 #endif
4503 {
4504         return swrap_getpeername(s, name, (socklen_t *)addrlen);
4505 }
4506
4507 /****************************************************************************
4508  *   GETSOCKNAME
4509  ***************************************************************************/
4510
4511 static int swrap_getsockname(int s, struct sockaddr *name, socklen_t *addrlen)
4512 {
4513         struct socket_info *si = find_socket_info(s);
4514         socklen_t len;
4515         int ret = -1;
4516
4517         if (!si) {
4518                 return libc_getsockname(s, name, addrlen);
4519         }
4520
4521         SWRAP_LOCK_SI(si);
4522
4523         len = MIN(*addrlen, si->myname.sa_socklen);
4524         if (len == 0) {
4525                 ret = 0;
4526                 goto out;
4527         }
4528
4529         memcpy(name, &si->myname.sa.ss, len);
4530         *addrlen = si->myname.sa_socklen;
4531
4532         ret = 0;
4533 out:
4534         SWRAP_UNLOCK_SI(si);
4535
4536         return ret;
4537 }
4538
4539 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4540 int getsockname(int s, struct sockaddr *name, Psocklen_t addrlen)
4541 #else
4542 int getsockname(int s, struct sockaddr *name, socklen_t *addrlen)
4543 #endif
4544 {
4545         return swrap_getsockname(s, name, (socklen_t *)addrlen);
4546 }
4547
4548 /****************************************************************************
4549  *   GETSOCKOPT
4550  ***************************************************************************/
4551
4552 #ifndef SO_PROTOCOL
4553 # ifdef SO_PROTOTYPE /* The Solaris name */
4554 #  define SO_PROTOCOL SO_PROTOTYPE
4555 # endif /* SO_PROTOTYPE */
4556 #endif /* SO_PROTOCOL */
4557
4558 static int swrap_getsockopt(int s, int level, int optname,
4559                             void *optval, socklen_t *optlen)
4560 {
4561         struct socket_info *si = find_socket_info(s);
4562         int ret;
4563
4564         if (!si) {
4565                 return libc_getsockopt(s,
4566                                        level,
4567                                        optname,
4568                                        optval,
4569                                        optlen);
4570         }
4571
4572         SWRAP_LOCK_SI(si);
4573
4574         if (level == SOL_SOCKET) {
4575                 switch (optname) {
4576 #ifdef SO_DOMAIN
4577                 case SO_DOMAIN:
4578                         if (optval == NULL || optlen == NULL ||
4579                             *optlen < (socklen_t)sizeof(int)) {
4580                                 errno = EINVAL;
4581                                 ret = -1;
4582                                 goto done;
4583                         }
4584
4585                         *optlen = sizeof(int);
4586                         *(int *)optval = si->family;
4587                         ret = 0;
4588                         goto done;
4589 #endif /* SO_DOMAIN */
4590
4591 #ifdef SO_PROTOCOL
4592                 case SO_PROTOCOL:
4593                         if (optval == NULL || optlen == NULL ||
4594                             *optlen < (socklen_t)sizeof(int)) {
4595                                 errno = EINVAL;
4596                                 ret = -1;
4597                                 goto done;
4598                         }
4599
4600                         *optlen = sizeof(int);
4601                         *(int *)optval = si->protocol;
4602                         ret = 0;
4603                         goto done;
4604 #endif /* SO_PROTOCOL */
4605                 case SO_TYPE:
4606                         if (optval == NULL || optlen == NULL ||
4607                             *optlen < (socklen_t)sizeof(int)) {
4608                                 errno = EINVAL;
4609                                 ret = -1;
4610                                 goto done;
4611                         }
4612
4613                         *optlen = sizeof(int);
4614                         *(int *)optval = si->type;
4615                         ret = 0;
4616                         goto done;
4617                 default:
4618                         ret = libc_getsockopt(s,
4619                                               level,
4620                                               optname,
4621                                               optval,
4622                                               optlen);
4623                         goto done;
4624                 }
4625         } else if (level == IPPROTO_TCP) {
4626                 switch (optname) {
4627 #ifdef TCP_NODELAY
4628                 case TCP_NODELAY:
4629                         /*
4630                          * This enables sending packets directly out over TCP.
4631                          * As a unix socket is doing that any way, report it as
4632                          * enabled.
4633                          */
4634                         if (optval == NULL || optlen == NULL ||
4635                             *optlen < (socklen_t)sizeof(int)) {
4636                                 errno = EINVAL;
4637                                 ret = -1;
4638                                 goto done;
4639                         }
4640
4641                         *optlen = sizeof(int);
4642                         *(int *)optval = si->tcp_nodelay;
4643
4644                         ret = 0;
4645                         goto done;
4646 #endif /* TCP_NODELAY */
4647 #ifdef TCP_INFO
4648                 case TCP_INFO: {
4649                         struct tcp_info info;
4650                         socklen_t ilen = sizeof(info);
4651
4652 #ifdef HAVE_NETINET_TCP_FSM_H
4653 /* This is FreeBSD */
4654 # define __TCP_LISTEN TCPS_LISTEN
4655 # define __TCP_ESTABLISHED TCPS_ESTABLISHED
4656 # define __TCP_CLOSE TCPS_CLOSED
4657 #else
4658 /* This is Linux */
4659 # define __TCP_LISTEN TCP_LISTEN
4660 # define __TCP_ESTABLISHED TCP_ESTABLISHED
4661 # define __TCP_CLOSE TCP_CLOSE
4662 #endif
4663
4664                         ZERO_STRUCT(info);
4665                         if (si->listening) {
4666                                 info.tcpi_state = __TCP_LISTEN;
4667                         } else if (si->connected) {
4668                                 /*
4669                                  * For now we just fake a few values
4670                                  * supported both by FreeBSD and Linux
4671                                  */
4672                                 info.tcpi_state = __TCP_ESTABLISHED;
4673                                 info.tcpi_rto = 200000;  /* 200 msec */
4674                                 info.tcpi_rtt = 5000;    /* 5 msec */
4675                                 info.tcpi_rttvar = 5000; /* 5 msec */
4676                         } else {
4677                                 info.tcpi_state = __TCP_CLOSE;
4678                                 info.tcpi_rto = 1000000;  /* 1 sec */
4679                                 info.tcpi_rtt = 0;
4680                                 info.tcpi_rttvar = 250000; /* 250 msec */
4681                         }
4682
4683                         if (optval == NULL || optlen == NULL ||
4684                             *optlen < (socklen_t)ilen) {
4685                                 errno = EINVAL;
4686                                 ret = -1;
4687                                 goto done;
4688                         }
4689
4690                         *optlen = ilen;
4691                         memcpy(optval, &info, ilen);
4692
4693                         ret = 0;
4694                         goto done;
4695                 }
4696 #endif /* TCP_INFO */
4697                 default:
4698                         break;
4699                 }
4700         }
4701
4702         errno = ENOPROTOOPT;
4703         ret = -1;
4704
4705 done:
4706         SWRAP_UNLOCK_SI(si);
4707         return ret;
4708 }
4709
4710 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4711 int getsockopt(int s, int level, int optname, void *optval, Psocklen_t optlen)
4712 #else
4713 int getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
4714 #endif
4715 {
4716         return swrap_getsockopt(s, level, optname, optval, (socklen_t *)optlen);
4717 }
4718
4719 /****************************************************************************
4720  *   SETSOCKOPT
4721  ***************************************************************************/
4722
4723 static int swrap_setsockopt(int s, int level, int optname,
4724                             const void *optval, socklen_t optlen)
4725 {
4726         struct socket_info *si = find_socket_info(s);
4727         int ret;
4728
4729         if (!si) {
4730                 return libc_setsockopt(s,
4731                                        level,
4732                                        optname,
4733                                        optval,
4734                                        optlen);
4735         }
4736
4737         if (level == SOL_SOCKET) {
4738                 return libc_setsockopt(s,
4739                                        level,
4740                                        optname,
4741                                        optval,
4742                                        optlen);
4743         }
4744
4745         SWRAP_LOCK_SI(si);
4746
4747         if (level == IPPROTO_TCP) {
4748                 switch (optname) {
4749 #ifdef TCP_NODELAY
4750                 case TCP_NODELAY: {
4751                         int i;
4752
4753                         /*
4754                          * This enables sending packets directly out over TCP.
4755                          * A unix socket is doing that any way.
4756                          */
4757                         if (optval == NULL || optlen == 0 ||
4758                             optlen < (socklen_t)sizeof(int)) {
4759                                 errno = EINVAL;
4760                                 ret = -1;
4761                                 goto done;
4762                         }
4763
4764                         i = *discard_const_p(int, optval);
4765                         if (i != 0 && i != 1) {
4766                                 errno = EINVAL;
4767                                 ret = -1;
4768                                 goto done;
4769                         }
4770                         si->tcp_nodelay = i;
4771
4772                         ret = 0;
4773                         goto done;
4774                 }
4775 #endif /* TCP_NODELAY */
4776                 default:
4777                         break;
4778                 }
4779         }
4780
4781         switch (si->family) {
4782         case AF_INET:
4783                 if (level == IPPROTO_IP) {
4784 #ifdef IP_PKTINFO
4785                         if (optname == IP_PKTINFO) {
4786                                 si->pktinfo = AF_INET;
4787                         }
4788 #endif /* IP_PKTINFO */
4789                 }
4790                 ret = 0;
4791                 goto done;
4792 #ifdef HAVE_IPV6
4793         case AF_INET6:
4794                 if (level == IPPROTO_IPV6) {
4795 #ifdef IPV6_RECVPKTINFO
4796                         if (optname == IPV6_RECVPKTINFO) {
4797                                 si->pktinfo = AF_INET6;
4798                         }
4799 #endif /* IPV6_PKTINFO */
4800                 }
4801                 ret = 0;
4802                 goto done;
4803 #endif
4804         default:
4805                 errno = ENOPROTOOPT;
4806                 ret = -1;
4807                 goto done;
4808         }
4809
4810 done:
4811         SWRAP_UNLOCK_SI(si);
4812         return ret;
4813 }
4814
4815 int setsockopt(int s, int level, int optname,
4816                const void *optval, socklen_t optlen)
4817 {
4818         return swrap_setsockopt(s, level, optname, optval, optlen);
4819 }
4820
4821 /****************************************************************************
4822  *   IOCTL
4823  ***************************************************************************/
4824
4825 static int swrap_vioctl(int s, unsigned long int r, va_list va)
4826 {
4827         struct socket_info *si = find_socket_info(s);
4828         va_list ap;
4829         int *value_ptr = NULL;
4830         int rc;
4831
4832         if (!si) {
4833                 return libc_vioctl(s, r, va);
4834         }
4835
4836         SWRAP_LOCK_SI(si);
4837
4838         va_copy(ap, va);
4839
4840         rc = libc_vioctl(s, r, va);
4841
4842         switch (r) {
4843         case FIONREAD:
4844                 if (rc == 0) {
4845                         value_ptr = ((int *)va_arg(ap, int *));
4846                 }
4847
4848                 if (rc == -1 && errno != EAGAIN && errno != ENOBUFS) {
4849                         swrap_pcap_dump_packet(si, NULL, SWRAP_PENDING_RST, NULL, 0);
4850                 } else if (value_ptr != NULL && *value_ptr == 0) { /* END OF FILE */
4851                         swrap_pcap_dump_packet(si, NULL, SWRAP_PENDING_RST, NULL, 0);
4852                 }
4853                 break;
4854 #ifdef FIONWRITE
4855         case FIONWRITE:
4856                 /* this is FreeBSD */
4857                 FALL_THROUGH; /* to TIOCOUTQ */
4858 #endif /* FIONWRITE */
4859         case TIOCOUTQ: /* same as SIOCOUTQ on Linux */
4860                 /*
4861                  * This may return more bytes then the application
4862                  * sent into the socket, for tcp it should
4863                  * return the number of unacked bytes.
4864                  *
4865                  * On AF_UNIX, all bytes are immediately acked!
4866                  */
4867                 if (rc == 0) {
4868                         value_ptr = ((int *)va_arg(ap, int *));
4869                         *value_ptr = 0;
4870                 }
4871                 break;
4872         }
4873
4874         va_end(ap);
4875
4876         SWRAP_UNLOCK_SI(si);
4877         return rc;
4878 }
4879
4880 #ifdef HAVE_IOCTL_INT
4881 int ioctl(int s, int r, ...)
4882 #else
4883 int ioctl(int s, unsigned long int r, ...)
4884 #endif
4885 {
4886         va_list va;
4887         int rc;
4888
4889         va_start(va, r);
4890
4891         rc = swrap_vioctl(s, (unsigned long int) r, va);
4892
4893         va_end(va);
4894
4895         return rc;
4896 }
4897
4898 /*****************
4899  * CMSG
4900  *****************/
4901
4902 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
4903
4904 #ifndef CMSG_ALIGN
4905 # ifdef _ALIGN /* BSD */
4906 #define CMSG_ALIGN _ALIGN
4907 # else
4908 #define CMSG_ALIGN(len) (((len) + sizeof(size_t) - 1) & ~(sizeof(size_t) - 1))
4909 # endif /* _ALIGN */
4910 #endif /* CMSG_ALIGN */
4911
4912 /**
4913  * @brief Add a cmsghdr to a msghdr.
4914  *
4915  * This is an function to add any type of cmsghdr. It will operate on the
4916  * msg->msg_control and msg->msg_controllen you pass in by adapting them to
4917  * the buffer position after the added cmsg element. Hence, this function is
4918  * intended to be used with an intermediate msghdr and not on the original
4919  * one handed in by the client.
4920  *
4921  * @param[in]  msg      The msghdr to which to add the cmsg.
4922  *
4923  * @param[in]  level    The cmsg level to set.
4924  *
4925  * @param[in]  type     The cmsg type to set.
4926  *
4927  * @param[in]  data     The cmsg data to set.
4928  *
4929  * @param[in]  len      the length of the data to set.
4930  */
4931 static void swrap_msghdr_add_cmsghdr(struct msghdr *msg,
4932                                      int level,
4933                                      int type,
4934                                      const void *data,
4935                                      size_t len)
4936 {
4937         size_t cmlen = CMSG_LEN(len);
4938         size_t cmspace = CMSG_SPACE(len);
4939         uint8_t cmbuf[cmspace];
4940         void *cast_ptr = (void *)cmbuf;
4941         struct cmsghdr *cm = (struct cmsghdr *)cast_ptr;
4942         uint8_t *p;
4943
4944         memset(cmbuf, 0, cmspace);
4945
4946         if (msg->msg_controllen < cmlen) {
4947                 cmlen = msg->msg_controllen;
4948                 msg->msg_flags |= MSG_CTRUNC;
4949         }
4950
4951         if (msg->msg_controllen < cmspace) {
4952                 cmspace = msg->msg_controllen;
4953         }
4954
4955         /*
4956          * We copy the full input data into an intermediate cmsghdr first
4957          * in order to more easily cope with truncation.
4958          */
4959         cm->cmsg_len = cmlen;
4960         cm->cmsg_level = level;
4961         cm->cmsg_type = type;
4962         memcpy(CMSG_DATA(cm), data, len);
4963
4964         /*
4965          * We now copy the possibly truncated buffer.
4966          * We copy cmlen bytes, but consume cmspace bytes,
4967          * leaving the possible padding uninitialiazed.
4968          */
4969         p = (uint8_t *)msg->msg_control;
4970         memcpy(p, cm, cmlen);
4971         p += cmspace;
4972         msg->msg_control = p;
4973         msg->msg_controllen -= cmspace;
4974
4975         return;
4976 }
4977
4978 static int swrap_msghdr_add_pktinfo(struct socket_info *si,
4979                                     struct msghdr *msg)
4980 {
4981         /* Add packet info */
4982         switch (si->pktinfo) {
4983 #if defined(IP_PKTINFO) && (defined(HAVE_STRUCT_IN_PKTINFO) || defined(IP_RECVDSTADDR))
4984         case AF_INET: {
4985                 struct sockaddr_in *sin;
4986 #if defined(HAVE_STRUCT_IN_PKTINFO)
4987                 struct in_pktinfo pkt;
4988 #elif defined(IP_RECVDSTADDR)
4989                 struct in_addr pkt;
4990 #endif
4991
4992                 if (si->bindname.sa_socklen == sizeof(struct sockaddr_in)) {
4993                         sin = &si->bindname.sa.in;
4994                 } else {
4995                         if (si->myname.sa_socklen != sizeof(struct sockaddr_in)) {
4996                                 return 0;
4997                         }
4998                         sin = &si->myname.sa.in;
4999                 }
5000
5001                 ZERO_STRUCT(pkt);
5002
5003 #if defined(HAVE_STRUCT_IN_PKTINFO)
5004                 pkt.ipi_ifindex = socket_wrapper_default_iface();
5005                 pkt.ipi_addr.s_addr = sin->sin_addr.s_addr;
5006 #elif defined(IP_RECVDSTADDR)
5007                 pkt = sin->sin_addr;
5008 #endif
5009
5010                 swrap_msghdr_add_cmsghdr(msg, IPPROTO_IP, IP_PKTINFO,
5011                                          &pkt, sizeof(pkt));
5012
5013                 break;
5014         }
5015 #endif /* IP_PKTINFO */
5016 #if defined(HAVE_IPV6)
5017         case AF_INET6: {
5018 #if defined(IPV6_PKTINFO) && defined(HAVE_STRUCT_IN6_PKTINFO)
5019                 struct sockaddr_in6 *sin6;
5020                 struct in6_pktinfo pkt6;
5021
5022                 if (si->bindname.sa_socklen == sizeof(struct sockaddr_in6)) {
5023                         sin6 = &si->bindname.sa.in6;
5024                 } else {
5025                         if (si->myname.sa_socklen != sizeof(struct sockaddr_in6)) {
5026                                 return 0;
5027                         }
5028                         sin6 = &si->myname.sa.in6;
5029                 }
5030
5031                 ZERO_STRUCT(pkt6);
5032
5033                 pkt6.ipi6_ifindex = socket_wrapper_default_iface();
5034                 pkt6.ipi6_addr = sin6->sin6_addr;
5035
5036                 swrap_msghdr_add_cmsghdr(msg, IPPROTO_IPV6, IPV6_PKTINFO,
5037                                         &pkt6, sizeof(pkt6));
5038 #endif /* HAVE_STRUCT_IN6_PKTINFO */
5039
5040                 break;
5041         }
5042 #endif /* IPV6_PKTINFO */
5043         default:
5044                 return -1;
5045         }
5046
5047         return 0;
5048 }
5049
5050 static int swrap_msghdr_add_socket_info(struct socket_info *si,
5051                                         struct msghdr *omsg)
5052 {
5053         int rc = 0;
5054
5055         if (si->pktinfo > 0) {
5056                 rc = swrap_msghdr_add_pktinfo(si, omsg);
5057         }
5058
5059         return rc;
5060 }
5061
5062 static int swrap_sendmsg_copy_cmsg(const struct cmsghdr *cmsg,
5063                                    uint8_t **cm_data,
5064                                    size_t *cm_data_space);
5065 static int swrap_sendmsg_filter_cmsg_ipproto_ip(const struct cmsghdr *cmsg,
5066                                                 uint8_t **cm_data,
5067                                                 size_t *cm_data_space);
5068 static int swrap_sendmsg_filter_cmsg_sol_socket(const struct cmsghdr *cmsg,
5069                                                 uint8_t **cm_data,
5070                                                 size_t *cm_data_space);
5071
5072 static int swrap_sendmsg_filter_cmsghdr(const struct msghdr *_msg,
5073                                         uint8_t **cm_data,
5074                                         size_t *cm_data_space)
5075 {
5076         struct msghdr *msg = discard_const_p(struct msghdr, _msg);
5077         struct cmsghdr *cmsg;
5078         int rc = -1;
5079
5080         /* Nothing to do */
5081         if (msg->msg_controllen == 0 || msg->msg_control == NULL) {
5082                 return 0;
5083         }
5084
5085         for (cmsg = CMSG_FIRSTHDR(msg);
5086              cmsg != NULL;
5087              cmsg = CMSG_NXTHDR(msg, cmsg)) {
5088                 switch (cmsg->cmsg_level) {
5089                 case IPPROTO_IP:
5090                         rc = swrap_sendmsg_filter_cmsg_ipproto_ip(cmsg,
5091                                                                   cm_data,
5092                                                                   cm_data_space);
5093                         break;
5094                 case SOL_SOCKET:
5095                         rc = swrap_sendmsg_filter_cmsg_sol_socket(cmsg,
5096                                                                   cm_data,
5097                                                                   cm_data_space);
5098                         break;
5099                 default:
5100                         rc = swrap_sendmsg_copy_cmsg(cmsg,
5101                                                      cm_data,
5102                                                      cm_data_space);
5103                         break;
5104                 }
5105                 if (rc < 0) {
5106                         int saved_errno = errno;
5107                         SAFE_FREE(*cm_data);
5108                         *cm_data_space = 0;
5109                         errno = saved_errno;
5110                         return rc;
5111                 }
5112         }
5113
5114         return rc;
5115 }
5116
5117 static int swrap_sendmsg_copy_cmsg(const struct cmsghdr *cmsg,
5118                                    uint8_t **cm_data,
5119                                    size_t *cm_data_space)
5120 {
5121         size_t cmspace;
5122         uint8_t *p;
5123
5124         cmspace = *cm_data_space + CMSG_ALIGN(cmsg->cmsg_len);
5125
5126         p = realloc((*cm_data), cmspace);
5127         if (p == NULL) {
5128                 return -1;
5129         }
5130         (*cm_data) = p;
5131
5132         p = (*cm_data) + (*cm_data_space);
5133         *cm_data_space = cmspace;
5134
5135         memcpy(p, cmsg, cmsg->cmsg_len);
5136
5137         return 0;
5138 }
5139
5140 static int swrap_sendmsg_filter_cmsg_pktinfo(const struct cmsghdr *cmsg,
5141                                             uint8_t **cm_data,
5142                                             size_t *cm_data_space);
5143
5144
5145 static int swrap_sendmsg_filter_cmsg_ipproto_ip(const struct cmsghdr *cmsg,
5146                                                 uint8_t **cm_data,
5147                                                 size_t *cm_data_space)
5148 {
5149         int rc = -1;
5150
5151         switch(cmsg->cmsg_type) {
5152 #ifdef IP_PKTINFO
5153         case IP_PKTINFO:
5154                 rc = swrap_sendmsg_filter_cmsg_pktinfo(cmsg,
5155                                                        cm_data,
5156                                                        cm_data_space);
5157                 break;
5158 #endif
5159 #ifdef IPV6_PKTINFO
5160         case IPV6_PKTINFO:
5161                 rc = swrap_sendmsg_filter_cmsg_pktinfo(cmsg,
5162                                                        cm_data,
5163                                                        cm_data_space);
5164                 break;
5165 #endif
5166         default:
5167                 break;
5168         }
5169
5170         return rc;
5171 }
5172
5173 static int swrap_sendmsg_filter_cmsg_pktinfo(const struct cmsghdr *cmsg,
5174                                              uint8_t **cm_data,
5175                                              size_t *cm_data_space)
5176 {
5177         (void)cmsg; /* unused */
5178         (void)cm_data; /* unused */
5179         (void)cm_data_space; /* unused */
5180
5181         /*
5182          * Passing a IP pktinfo to a unix socket might be rejected by the
5183          * Kernel, at least on FreeBSD. So skip this cmsg.
5184          */
5185         return 0;
5186 }
5187
5188 static int swrap_sendmsg_filter_cmsg_sol_socket(const struct cmsghdr *cmsg,
5189                                                 uint8_t **cm_data,
5190                                                 size_t *cm_data_space)
5191 {
5192         int rc = -1;
5193
5194         switch (cmsg->cmsg_type) {
5195         case SCM_RIGHTS:
5196                 SWRAP_LOG(SWRAP_LOG_TRACE,
5197                           "Ignoring SCM_RIGHTS on inet socket!");
5198                 rc = 0;
5199                 break;
5200 #ifdef SCM_CREDENTIALS
5201         case SCM_CREDENTIALS:
5202                 SWRAP_LOG(SWRAP_LOG_TRACE,
5203                           "Ignoring SCM_CREDENTIALS on inet socket!");
5204                 rc = 0;
5205                 break;
5206 #endif /* SCM_CREDENTIALS */
5207         default:
5208                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5209                                              cm_data,
5210                                              cm_data_space);
5211                 break;
5212         }
5213
5214         return rc;
5215 }
5216
5217 static const uint64_t swrap_unix_scm_right_magic = 0x8e0e13f27c42fc36;
5218
5219 /*
5220  * We only allow up to 6 fds at a time
5221  * as that's more than enough for Samba
5222  * and it means we can keep the logic simple
5223  * and work with fixed size arrays.
5224  *
5225  * We also keep sizeof(struct swrap_unix_scm_rights)
5226  * under PIPE_BUF (4096) in order to allow a non-blocking
5227  * write into the pipe.
5228  */
5229 #ifndef PIPE_BUF
5230 #define PIPE_BUF 4096
5231 #endif
5232 #define SWRAP_MAX_PASSED_FDS ((size_t)6)
5233 #define SWRAP_MAX_PASSED_SOCKET_INFO SWRAP_MAX_PASSED_FDS
5234 struct swrap_unix_scm_rights_payload {
5235         uint8_t num_idxs;
5236         int8_t idxs[SWRAP_MAX_PASSED_FDS];
5237         struct socket_info infos[SWRAP_MAX_PASSED_SOCKET_INFO];
5238 };
5239 struct swrap_unix_scm_rights {
5240         uint64_t magic;
5241         char package_name[sizeof(SOCKET_WRAPPER_PACKAGE)];
5242         char package_version[sizeof(SOCKET_WRAPPER_VERSION)];
5243         uint32_t full_size;
5244         uint32_t payload_size;
5245         struct swrap_unix_scm_rights_payload payload;
5246 };
5247
5248 static void swrap_dec_fd_passed_array(size_t num, struct socket_info **array)
5249 {
5250         int saved_errno = errno;
5251         size_t i;
5252
5253         for (i = 0; i < num; i++) {
5254                 struct socket_info *si = array[i];
5255                 if (si == NULL) {
5256                         continue;
5257                 }
5258
5259                 SWRAP_LOCK_SI(si);
5260                 swrap_dec_refcount(si);
5261                 if (si->fd_passed > 0) {
5262                         si->fd_passed -= 1;
5263                 }
5264                 SWRAP_UNLOCK_SI(si);
5265                 array[i] = NULL;
5266         }
5267
5268         errno = saved_errno;
5269 }
5270
5271 static void swrap_undo_si_idx_array(size_t num, int *array)
5272 {
5273         int saved_errno = errno;
5274         size_t i;
5275
5276         swrap_mutex_lock(&first_free_mutex);
5277
5278         for (i = 0; i < num; i++) {
5279                 struct socket_info *si = NULL;
5280
5281                 if (array[i] == -1) {
5282                         continue;
5283                 }
5284
5285                 si = swrap_get_socket_info(array[i]);
5286                 if (si == NULL) {
5287                         continue;
5288                 }
5289
5290                 SWRAP_LOCK_SI(si);
5291                 swrap_dec_refcount(si);
5292                 SWRAP_UNLOCK_SI(si);
5293
5294                 swrap_set_next_free(si, first_free);
5295                 first_free = array[i];
5296                 array[i] = -1;
5297         }
5298
5299         swrap_mutex_unlock(&first_free_mutex);
5300         errno = saved_errno;
5301 }
5302
5303 static void swrap_close_fd_array(size_t num, const int *array)
5304 {
5305         int saved_errno = errno;
5306         size_t i;
5307
5308         for (i = 0; i < num; i++) {
5309                 if (array[i] == -1) {
5310                         continue;
5311                 }
5312                 libc_close(array[i]);
5313         }
5314
5315         errno = saved_errno;
5316 }
5317
5318 union __swrap_fds {
5319         const uint8_t *p;
5320         int *fds;
5321 };
5322
5323 union __swrap_cmsghdr {
5324         const uint8_t *p;
5325         struct cmsghdr *cmsg;
5326 };
5327
5328 static int swrap_sendmsg_unix_scm_rights(struct cmsghdr *cmsg,
5329                                          uint8_t **cm_data,
5330                                          size_t *cm_data_space,
5331                                          int *scm_rights_pipe_fd)
5332 {
5333         struct swrap_unix_scm_rights info;
5334         struct swrap_unix_scm_rights_payload *payload = NULL;
5335         int si_idx_array[SWRAP_MAX_PASSED_FDS];
5336         struct socket_info *si_array[SWRAP_MAX_PASSED_FDS] = { NULL, };
5337         size_t info_idx = 0;
5338         size_t size_fds_in;
5339         size_t num_fds_in;
5340         union __swrap_fds __fds_in = { .p = NULL, };
5341         const int *fds_in = NULL;
5342         size_t num_fds_out;
5343         size_t size_fds_out;
5344         union __swrap_fds __fds_out = { .p = NULL, };
5345         int *fds_out = NULL;
5346         size_t cmsg_len;
5347         size_t cmsg_space;
5348         size_t new_cm_data_space;
5349         union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
5350         struct cmsghdr *new_cmsg = NULL;
5351         uint8_t *p = NULL;
5352         size_t i;
5353         int pipefd[2] = { -1, -1 };
5354         int rc;
5355         ssize_t sret;
5356
5357         /*
5358          * We pass this a buffer to the kernel make sure any padding
5359          * is also cleared.
5360          */
5361         ZERO_STRUCT(info);
5362         info.magic = swrap_unix_scm_right_magic;
5363         memcpy(info.package_name,
5364                SOCKET_WRAPPER_PACKAGE,
5365                sizeof(info.package_name));
5366         memcpy(info.package_version,
5367                SOCKET_WRAPPER_VERSION,
5368                sizeof(info.package_version));
5369         info.full_size = sizeof(info);
5370         info.payload_size = sizeof(info.payload);
5371         payload = &info.payload;
5372
5373         if (*scm_rights_pipe_fd != -1) {
5374                 SWRAP_LOG(SWRAP_LOG_ERROR,
5375                           "Two SCM_RIGHTS headers are not supported by socket_wrapper");
5376                 errno = EINVAL;
5377                 return -1;
5378         }
5379
5380         if (cmsg->cmsg_len < CMSG_LEN(0)) {
5381                 SWRAP_LOG(SWRAP_LOG_ERROR,
5382                           "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
5383                           (size_t)cmsg->cmsg_len,
5384                           CMSG_LEN(0));
5385                 errno = EINVAL;
5386                 return -1;
5387         }
5388         size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
5389         if ((size_fds_in % sizeof(int)) != 0) {
5390                 SWRAP_LOG(SWRAP_LOG_ERROR,
5391                           "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
5392                           (size_t)cmsg->cmsg_len,
5393                           size_fds_in,
5394                           sizeof(int));
5395                 errno = EINVAL;
5396                 return -1;
5397         }
5398         num_fds_in = size_fds_in / sizeof(int);
5399         if (num_fds_in > SWRAP_MAX_PASSED_FDS) {
5400                 SWRAP_LOG(SWRAP_LOG_ERROR,
5401                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5402                           "num_fds_in=%zu > "
5403                           "SWRAP_MAX_PASSED_FDS(%zu)",
5404                           (size_t)cmsg->cmsg_len,
5405                           size_fds_in,
5406                           num_fds_in,
5407                           SWRAP_MAX_PASSED_FDS);
5408                 errno = EINVAL;
5409                 return -1;
5410         }
5411         if (num_fds_in == 0) {
5412                 SWRAP_LOG(SWRAP_LOG_ERROR,
5413                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5414                           "num_fds_in=%zu",
5415                           (size_t)cmsg->cmsg_len,
5416                           size_fds_in,
5417                           num_fds_in);
5418                 errno = EINVAL;
5419                 return -1;
5420         }
5421         __fds_in.p = CMSG_DATA(cmsg);
5422         fds_in = __fds_in.fds;
5423         num_fds_out = num_fds_in + 1;
5424
5425         SWRAP_LOG(SWRAP_LOG_TRACE,
5426                   "num_fds_in=%zu num_fds_out=%zu",
5427                   num_fds_in, num_fds_out);
5428
5429         size_fds_out = sizeof(int) * num_fds_out;
5430         cmsg_len = CMSG_LEN(size_fds_out);
5431         cmsg_space = CMSG_SPACE(size_fds_out);
5432
5433         new_cm_data_space = *cm_data_space + cmsg_space;
5434
5435         p = realloc((*cm_data), new_cm_data_space);
5436         if (p == NULL) {
5437                 return -1;
5438         }
5439         (*cm_data) = p;
5440         p = (*cm_data) + (*cm_data_space);
5441         memset(p, 0, cmsg_space);
5442         __new_cmsg.p = p;
5443         new_cmsg = __new_cmsg.cmsg;
5444         *new_cmsg = *cmsg;
5445         __fds_out.p = CMSG_DATA(new_cmsg);
5446         fds_out = __fds_out.fds;
5447         memcpy(fds_out, fds_in, size_fds_in);
5448         new_cmsg->cmsg_len = cmsg->cmsg_len;
5449
5450         for (i = 0; i < num_fds_in; i++) {
5451                 size_t j;
5452
5453                 payload->idxs[i] = -1;
5454                 payload->num_idxs++;
5455
5456                 si_idx_array[i] = find_socket_info_index(fds_in[i]);
5457                 if (si_idx_array[i] == -1) {
5458                         continue;
5459                 }
5460
5461                 si_array[i] = swrap_get_socket_info(si_idx_array[i]);
5462                 if (si_array[i] == NULL) {
5463                         SWRAP_LOG(SWRAP_LOG_ERROR,
5464                                   "fds_in[%zu]=%d si_idx_array[%zu]=%d missing!",
5465                                   i, fds_in[i], i, si_idx_array[i]);
5466                         errno = EINVAL;
5467                         return -1;
5468                 }
5469
5470                 for (j = 0; j < i; j++) {
5471                         if (si_array[j] == si_array[i]) {
5472                                 payload->idxs[i] = payload->idxs[j];
5473                                 break;
5474                         }
5475                 }
5476                 if (payload->idxs[i] == -1) {
5477                         if (info_idx >= SWRAP_MAX_PASSED_SOCKET_INFO) {
5478                                 SWRAP_LOG(SWRAP_LOG_ERROR,
5479                                           "fds_in[%zu]=%d,si_idx_array[%zu]=%d: "
5480                                           "info_idx=%zu >= SWRAP_MAX_PASSED_FDS(%zu)!",
5481                                           i, fds_in[i], i, si_idx_array[i],
5482                                           info_idx,
5483                                           SWRAP_MAX_PASSED_SOCKET_INFO);
5484                                 errno = EINVAL;
5485                                 return -1;
5486                         }
5487                         payload->idxs[i] = info_idx;
5488                         info_idx += 1;
5489                         continue;
5490                 }
5491         }
5492
5493         for (i = 0; i < num_fds_in; i++) {
5494                 struct socket_info *si = si_array[i];
5495
5496                 if (si == NULL) {
5497                         SWRAP_LOG(SWRAP_LOG_TRACE,
5498                                   "fds_in[%zu]=%d not an inet socket",
5499                                   i, fds_in[i]);
5500                         continue;
5501                 }
5502
5503                 SWRAP_LOG(SWRAP_LOG_TRACE,
5504                           "fds_in[%zu]=%d si_idx_array[%zu]=%d "
5505                           "passing as info.idxs[%zu]=%d!",
5506                           i, fds_in[i],
5507                           i, si_idx_array[i],
5508                           i, payload->idxs[i]);
5509
5510                 SWRAP_LOCK_SI(si);
5511                 si->fd_passed += 1;
5512                 payload->infos[payload->idxs[i]] = *si;
5513                 payload->infos[payload->idxs[i]].fd_passed = 0;
5514                 SWRAP_UNLOCK_SI(si);
5515         }
5516
5517         rc = pipe(pipefd);
5518         if (rc == -1) {
5519                 int saved_errno = errno;
5520                 SWRAP_LOG(SWRAP_LOG_ERROR,
5521                           "pipe() failed - %d %s",
5522                           saved_errno,
5523                           strerror(saved_errno));
5524                 swrap_dec_fd_passed_array(num_fds_in, si_array);
5525                 errno = saved_errno;
5526                 return -1;
5527         }
5528
5529         sret = libc_write(pipefd[1], &info, sizeof(info));
5530         if (sret != sizeof(info)) {
5531                 int saved_errno = errno;
5532                 if (sret != -1) {
5533                         saved_errno = EINVAL;
5534                 }
5535                 SWRAP_LOG(SWRAP_LOG_ERROR,
5536                           "write() failed - sret=%zd - %d %s",
5537                           sret, saved_errno,
5538                           strerror(saved_errno));
5539                 swrap_dec_fd_passed_array(num_fds_in, si_array);
5540                 libc_close(pipefd[1]);
5541                 libc_close(pipefd[0]);
5542                 errno = saved_errno;
5543                 return -1;
5544         }
5545         libc_close(pipefd[1]);
5546
5547         /*
5548          * Add the pipe read end to the end of the passed fd array
5549          */
5550         fds_out[num_fds_in] = pipefd[0];
5551         new_cmsg->cmsg_len = cmsg_len;
5552
5553         /* we're done ... */
5554         *scm_rights_pipe_fd = pipefd[0];
5555         *cm_data_space = new_cm_data_space;
5556
5557         return 0;
5558 }
5559
5560 static int swrap_sendmsg_unix_sol_socket(struct cmsghdr *cmsg,
5561                                          uint8_t **cm_data,
5562                                          size_t *cm_data_space,
5563                                          int *scm_rights_pipe_fd)
5564 {
5565         int rc = -1;
5566
5567         switch (cmsg->cmsg_type) {
5568         case SCM_RIGHTS:
5569                 rc = swrap_sendmsg_unix_scm_rights(cmsg,
5570                                                    cm_data,
5571                                                    cm_data_space,
5572                                                    scm_rights_pipe_fd);
5573                 break;
5574         default:
5575                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5576                                              cm_data,
5577                                              cm_data_space);
5578                 break;
5579         }
5580
5581         return rc;
5582 }
5583
5584 static int swrap_recvmsg_unix_scm_rights(struct cmsghdr *cmsg,
5585                                          uint8_t **cm_data,
5586                                          size_t *cm_data_space)
5587 {
5588         int scm_rights_pipe_fd = -1;
5589         struct swrap_unix_scm_rights info;
5590         struct swrap_unix_scm_rights_payload *payload = NULL;
5591         int si_idx_array[SWRAP_MAX_PASSED_FDS];
5592         size_t size_fds_in;
5593         size_t num_fds_in;
5594         union __swrap_fds __fds_in = { .p = NULL, };
5595         const int *fds_in = NULL;
5596         size_t num_fds_out;
5597         size_t size_fds_out;
5598         union __swrap_fds __fds_out = { .p = NULL, };
5599         int *fds_out = NULL;
5600         size_t cmsg_len;
5601         size_t cmsg_space;
5602         size_t new_cm_data_space;
5603         union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
5604         struct cmsghdr *new_cmsg = NULL;
5605         uint8_t *p = NULL;
5606         ssize_t sret;
5607         size_t i;
5608         int cmp;
5609
5610         if (cmsg->cmsg_len < CMSG_LEN(0)) {
5611                 SWRAP_LOG(SWRAP_LOG_ERROR,
5612                           "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
5613                           (size_t)cmsg->cmsg_len,
5614                           CMSG_LEN(0));
5615                 errno = EINVAL;
5616                 return -1;
5617         }
5618         size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
5619         if ((size_fds_in % sizeof(int)) != 0) {
5620                 SWRAP_LOG(SWRAP_LOG_ERROR,
5621                           "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
5622                           (size_t)cmsg->cmsg_len,
5623                           size_fds_in,
5624                           sizeof(int));
5625                 errno = EINVAL;
5626                 return -1;
5627         }
5628         num_fds_in = size_fds_in / sizeof(int);
5629         if (num_fds_in > (SWRAP_MAX_PASSED_FDS + 1)) {
5630                 SWRAP_LOG(SWRAP_LOG_ERROR,
5631                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5632                           "num_fds_in=%zu > SWRAP_MAX_PASSED_FDS+1(%zu)",
5633                           (size_t)cmsg->cmsg_len,
5634                           size_fds_in,
5635                           num_fds_in,
5636                           SWRAP_MAX_PASSED_FDS+1);
5637                 errno = EINVAL;
5638                 return -1;
5639         }
5640         if (num_fds_in <= 1) {
5641                 SWRAP_LOG(SWRAP_LOG_ERROR,
5642                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5643                           "num_fds_in=%zu",
5644                           (size_t)cmsg->cmsg_len,
5645                           size_fds_in,
5646                           num_fds_in);
5647                 errno = EINVAL;
5648                 return -1;
5649         }
5650         __fds_in.p = CMSG_DATA(cmsg);
5651         fds_in = __fds_in.fds;
5652         num_fds_out = num_fds_in - 1;
5653
5654         SWRAP_LOG(SWRAP_LOG_TRACE,
5655                   "num_fds_in=%zu num_fds_out=%zu",
5656                   num_fds_in, num_fds_out);
5657
5658         for (i = 0; i < num_fds_in; i++) {
5659                 /* Check if we have a stale fd and remove it */
5660                 swrap_remove_stale(fds_in[i]);
5661         }
5662
5663         scm_rights_pipe_fd = fds_in[num_fds_out];
5664         size_fds_out = sizeof(int) * num_fds_out;
5665         cmsg_len = CMSG_LEN(size_fds_out);
5666         cmsg_space = CMSG_SPACE(size_fds_out);
5667
5668         new_cm_data_space = *cm_data_space + cmsg_space;
5669
5670         p = realloc((*cm_data), new_cm_data_space);
5671         if (p == NULL) {
5672                 swrap_close_fd_array(num_fds_in, fds_in);
5673                 return -1;
5674         }
5675         (*cm_data) = p;
5676         p = (*cm_data) + (*cm_data_space);
5677         memset(p, 0, cmsg_space);
5678         __new_cmsg.p = p;
5679         new_cmsg = __new_cmsg.cmsg;
5680         *new_cmsg = *cmsg;
5681         __fds_out.p = CMSG_DATA(new_cmsg);
5682         fds_out = __fds_out.fds;
5683         memcpy(fds_out, fds_in, size_fds_out);
5684         new_cmsg->cmsg_len = cmsg_len;
5685
5686         sret = read(scm_rights_pipe_fd, &info, sizeof(info));
5687         if (sret != sizeof(info)) {
5688                 int saved_errno = errno;
5689                 if (sret != -1) {
5690                         saved_errno = EINVAL;
5691                 }
5692                 SWRAP_LOG(SWRAP_LOG_ERROR,
5693                           "read() failed - sret=%zd - %d %s",
5694                           sret, saved_errno,
5695                           strerror(saved_errno));
5696                 swrap_close_fd_array(num_fds_in, fds_in);
5697                 errno = saved_errno;
5698                 return -1;
5699         }
5700         libc_close(scm_rights_pipe_fd);
5701         payload = &info.payload;
5702
5703         if (info.magic != swrap_unix_scm_right_magic) {
5704                 SWRAP_LOG(SWRAP_LOG_ERROR,
5705                           "info.magic=0x%llx != swrap_unix_scm_right_magic=0x%llx",
5706                           (unsigned long long)info.magic,
5707                           (unsigned long long)swrap_unix_scm_right_magic);
5708                 swrap_close_fd_array(num_fds_out, fds_out);
5709                 errno = EINVAL;
5710                 return -1;
5711         }
5712
5713         cmp = memcmp(info.package_name,
5714                      SOCKET_WRAPPER_PACKAGE,
5715                      sizeof(info.package_name));
5716         if (cmp != 0) {
5717                 SWRAP_LOG(SWRAP_LOG_ERROR,
5718                           "info.package_name='%.*s' != '%s'",
5719                           (int)sizeof(info.package_name),
5720                           info.package_name,
5721                           SOCKET_WRAPPER_PACKAGE);
5722                 swrap_close_fd_array(num_fds_out, fds_out);
5723                 errno = EINVAL;
5724                 return -1;
5725         }
5726
5727         cmp = memcmp(info.package_version,
5728                      SOCKET_WRAPPER_VERSION,
5729                      sizeof(info.package_version));
5730         if (cmp != 0) {
5731                 SWRAP_LOG(SWRAP_LOG_ERROR,
5732                           "info.package_version='%.*s' != '%s'",
5733                           (int)sizeof(info.package_version),
5734                           info.package_version,
5735                           SOCKET_WRAPPER_VERSION);
5736                 swrap_close_fd_array(num_fds_out, fds_out);
5737                 errno = EINVAL;
5738                 return -1;
5739         }
5740
5741         if (info.full_size != sizeof(info)) {
5742                 SWRAP_LOG(SWRAP_LOG_ERROR,
5743                           "info.full_size=%zu != sizeof(info)=%zu",
5744                           (size_t)info.full_size,
5745                           sizeof(info));
5746                 swrap_close_fd_array(num_fds_out, fds_out);
5747                 errno = EINVAL;
5748                 return -1;
5749         }
5750
5751         if (info.payload_size != sizeof(info.payload)) {
5752                 SWRAP_LOG(SWRAP_LOG_ERROR,
5753                           "info.payload_size=%zu != sizeof(info.payload)=%zu",
5754                           (size_t)info.payload_size,
5755                           sizeof(info.payload));
5756                 swrap_close_fd_array(num_fds_out, fds_out);
5757                 errno = EINVAL;
5758                 return -1;
5759         }
5760
5761         if (payload->num_idxs != num_fds_out) {
5762                 SWRAP_LOG(SWRAP_LOG_ERROR,
5763                           "info.num_idxs=%u != num_fds_out=%zu",
5764                           payload->num_idxs, num_fds_out);
5765                 swrap_close_fd_array(num_fds_out, fds_out);
5766                 errno = EINVAL;
5767                 return -1;
5768         }
5769
5770         for (i = 0; i < num_fds_out; i++) {
5771                 size_t j;
5772
5773                 si_idx_array[i] = -1;
5774
5775                 if (payload->idxs[i] == -1) {
5776                         SWRAP_LOG(SWRAP_LOG_TRACE,
5777                                   "fds_out[%zu]=%d not an inet socket",
5778                                   i, fds_out[i]);
5779                         continue;
5780                 }
5781
5782                 if (payload->idxs[i] < 0) {
5783                         SWRAP_LOG(SWRAP_LOG_ERROR,
5784                                   "fds_out[%zu]=%d info.idxs[%zu]=%d < 0!",
5785                                   i, fds_out[i], i, payload->idxs[i]);
5786                         swrap_close_fd_array(num_fds_out, fds_out);
5787                         errno = EINVAL;
5788                         return -1;
5789                 }
5790
5791                 if (payload->idxs[i] >= payload->num_idxs) {
5792                         SWRAP_LOG(SWRAP_LOG_ERROR,
5793                                   "fds_out[%zu]=%d info.idxs[%zu]=%d >= %u!",
5794                                   i, fds_out[i], i, payload->idxs[i],
5795                                   payload->num_idxs);
5796                         swrap_close_fd_array(num_fds_out, fds_out);
5797                         errno = EINVAL;
5798                         return -1;
5799                 }
5800
5801                 if ((size_t)fds_out[i] >= socket_fds_max) {
5802                         SWRAP_LOG(SWRAP_LOG_ERROR,
5803                                   "The max socket index limit of %zu has been reached, "
5804                                   "trying to add %d",
5805                                   socket_fds_max,
5806                                   fds_out[i]);
5807                         swrap_close_fd_array(num_fds_out, fds_out);
5808                         errno = EMFILE;
5809                         return -1;
5810                 }
5811
5812                 SWRAP_LOG(SWRAP_LOG_TRACE,
5813                           "fds_in[%zu]=%d "
5814                           "received as info.idxs[%zu]=%d!",
5815                           i, fds_out[i],
5816                           i, payload->idxs[i]);
5817
5818                 for (j = 0; j < i; j++) {
5819                         if (payload->idxs[j] == -1) {
5820                                 continue;
5821                         }
5822                         if (payload->idxs[j] == payload->idxs[i]) {
5823                                 si_idx_array[i] = si_idx_array[j];
5824                         }
5825                 }
5826                 if (si_idx_array[i] == -1) {
5827                         const struct socket_info *si = &payload->infos[payload->idxs[i]];
5828
5829                         si_idx_array[i] = swrap_add_socket_info(si);
5830                         if (si_idx_array[i] == -1) {
5831                                 int saved_errno = errno;
5832                                 SWRAP_LOG(SWRAP_LOG_ERROR,
5833                                           "The max socket index limit of %zu has been reached, "
5834                                           "trying to add %d",
5835                                           socket_fds_max,
5836                                           fds_out[i]);
5837                                 swrap_undo_si_idx_array(i, si_idx_array);
5838                                 swrap_close_fd_array(num_fds_out, fds_out);
5839                                 errno = saved_errno;
5840                                 return -1;
5841                         }
5842                         SWRAP_LOG(SWRAP_LOG_TRACE,
5843                                   "Imported %s socket for protocol %s, fd=%d",
5844                                   si->family == AF_INET ? "IPv4" : "IPv6",
5845                                   si->type == SOCK_DGRAM ? "UDP" : "TCP",
5846                                   fds_out[i]);
5847                 }
5848         }
5849
5850         for (i = 0; i < num_fds_out; i++) {
5851                 if (si_idx_array[i] == -1) {
5852                         continue;
5853                 }
5854                 set_socket_info_index(fds_out[i], si_idx_array[i]);
5855         }
5856
5857         /* we're done ... */
5858         *cm_data_space = new_cm_data_space;
5859
5860         return 0;
5861 }
5862
5863 static int swrap_recvmsg_unix_sol_socket(struct cmsghdr *cmsg,
5864                                          uint8_t **cm_data,
5865                                          size_t *cm_data_space)
5866 {
5867         int rc = -1;
5868
5869         switch (cmsg->cmsg_type) {
5870         case SCM_RIGHTS:
5871                 rc = swrap_recvmsg_unix_scm_rights(cmsg,
5872                                                    cm_data,
5873                                                    cm_data_space);
5874                 break;
5875         default:
5876                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5877                                              cm_data,
5878                                              cm_data_space);
5879                 break;
5880         }
5881
5882         return rc;
5883 }
5884
5885 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5886
5887 static int swrap_sendmsg_before_unix(const struct msghdr *_msg_in,
5888                                      struct msghdr *msg_tmp,
5889                                      int *scm_rights_pipe_fd)
5890 {
5891 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
5892         struct msghdr *msg_in = discard_const_p(struct msghdr, _msg_in);
5893         struct cmsghdr *cmsg = NULL;
5894         uint8_t *cm_data = NULL;
5895         size_t cm_data_space = 0;
5896         int rc = -1;
5897
5898         *msg_tmp = *msg_in;
5899         *scm_rights_pipe_fd = -1;
5900
5901         /* Nothing to do */
5902         if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
5903                 return 0;
5904         }
5905
5906         for (cmsg = CMSG_FIRSTHDR(msg_in);
5907              cmsg != NULL;
5908              cmsg = CMSG_NXTHDR(msg_in, cmsg)) {
5909                 switch (cmsg->cmsg_level) {
5910                 case SOL_SOCKET:
5911                         rc = swrap_sendmsg_unix_sol_socket(cmsg,
5912                                                            &cm_data,
5913                                                            &cm_data_space,
5914                                                            scm_rights_pipe_fd);
5915                         break;
5916
5917                 default:
5918                         rc = swrap_sendmsg_copy_cmsg(cmsg,
5919                                                      &cm_data,
5920                                                      &cm_data_space);
5921                         break;
5922                 }
5923                 if (rc < 0) {
5924                         int saved_errno = errno;
5925                         SAFE_FREE(cm_data);
5926                         errno = saved_errno;
5927                         return rc;
5928                 }
5929         }
5930
5931         msg_tmp->msg_controllen = cm_data_space;
5932         msg_tmp->msg_control = cm_data;
5933
5934         return 0;
5935 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5936         *msg_tmp = *_msg_in;
5937         return 0;
5938 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5939 }
5940
5941 static ssize_t swrap_sendmsg_after_unix(struct msghdr *msg_tmp,
5942                                         ssize_t ret,
5943                                         int scm_rights_pipe_fd)
5944 {
5945 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
5946         int saved_errno = errno;
5947         SAFE_FREE(msg_tmp->msg_control);
5948         if (scm_rights_pipe_fd != -1) {
5949                 libc_close(scm_rights_pipe_fd);
5950         }
5951         errno = saved_errno;
5952 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5953         return ret;
5954 }
5955
5956 static int swrap_recvmsg_before_unix(struct msghdr *msg_in,
5957                                      struct msghdr *msg_tmp,
5958                                      uint8_t **tmp_control)
5959 {
5960 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
5961         const size_t cm_extra_space = CMSG_SPACE(sizeof(int));
5962         uint8_t *cm_data = NULL;
5963         size_t cm_data_space = 0;
5964
5965         *msg_tmp = *msg_in;
5966         *tmp_control = NULL;
5967
5968         SWRAP_LOG(SWRAP_LOG_TRACE,
5969                   "msg_in->msg_controllen=%zu",
5970                   (size_t)msg_in->msg_controllen);
5971
5972         /* Nothing to do */
5973         if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
5974                 return 0;
5975         }
5976
5977         /*
5978          * We need to give the kernel a bit more space in order
5979          * recv the pipe fd, added by swrap_sendmsg_before_unix()).
5980          * swrap_recvmsg_after_unix() will hide it again.
5981          */
5982         cm_data_space = msg_in->msg_controllen;
5983         if (cm_data_space < (INT32_MAX - cm_extra_space)) {
5984                 cm_data_space += cm_extra_space;
5985         }
5986         cm_data = calloc(1, cm_data_space);
5987         if (cm_data == NULL) {
5988                 return -1;
5989         }
5990
5991         msg_tmp->msg_controllen = cm_data_space;
5992         msg_tmp->msg_control = cm_data;
5993         *tmp_control = cm_data;
5994
5995         SWRAP_LOG(SWRAP_LOG_TRACE,
5996                   "msg_tmp->msg_controllen=%zu",
5997                   (size_t)msg_tmp->msg_controllen);
5998         return 0;
5999 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6000         *msg_tmp = *msg_in;
6001         *tmp_control = NULL;
6002         return 0;
6003 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6004 }
6005
6006 static ssize_t swrap_recvmsg_after_unix(struct msghdr *msg_tmp,
6007                                         uint8_t **tmp_control,
6008                                         struct msghdr *msg_out,
6009                                         ssize_t ret)
6010 {
6011 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6012         struct cmsghdr *cmsg = NULL;
6013         uint8_t *cm_data = NULL;
6014         size_t cm_data_space = 0;
6015         int rc = -1;
6016
6017         if (ret < 0) {
6018                 int saved_errno = errno;
6019                 SWRAP_LOG(SWRAP_LOG_TRACE, "ret=%zd - %d - %s", ret,
6020                           saved_errno, strerror(saved_errno));
6021                 SAFE_FREE(*tmp_control);
6022                 /* msg_out should not be touched on error */
6023                 errno = saved_errno;
6024                 return ret;
6025         }
6026
6027         SWRAP_LOG(SWRAP_LOG_TRACE,
6028                   "msg_tmp->msg_controllen=%zu",
6029                   (size_t)msg_tmp->msg_controllen);
6030
6031         /* Nothing to do */
6032         if (msg_tmp->msg_controllen == 0 || msg_tmp->msg_control == NULL) {
6033                 int saved_errno = errno;
6034                 *msg_out = *msg_tmp;
6035                 SAFE_FREE(*tmp_control);
6036                 errno = saved_errno;
6037                 return ret;
6038         }
6039
6040         for (cmsg = CMSG_FIRSTHDR(msg_tmp);
6041              cmsg != NULL;
6042              cmsg = CMSG_NXTHDR(msg_tmp, cmsg)) {
6043                 switch (cmsg->cmsg_level) {
6044                 case SOL_SOCKET:
6045                         rc = swrap_recvmsg_unix_sol_socket(cmsg,
6046                                                            &cm_data,
6047                                                            &cm_data_space);
6048                         break;
6049
6050                 default:
6051                         rc = swrap_sendmsg_copy_cmsg(cmsg,
6052                                                      &cm_data,
6053                                                      &cm_data_space);
6054                         break;
6055                 }
6056                 if (rc < 0) {
6057                         int saved_errno = errno;
6058                         SAFE_FREE(cm_data);
6059                         SAFE_FREE(*tmp_control);
6060                         errno = saved_errno;
6061                         return rc;
6062                 }
6063         }
6064
6065         /*
6066          * msg_tmp->msg_control (*tmp_control) was created by
6067          * swrap_recvmsg_before_unix() and msg_out->msg_control
6068          * is still the buffer of the caller.
6069          */
6070         msg_tmp->msg_control = msg_out->msg_control;
6071         msg_tmp->msg_controllen = msg_out->msg_controllen;
6072         *msg_out = *msg_tmp;
6073
6074         cm_data_space = MIN(cm_data_space, msg_out->msg_controllen);
6075         memcpy(msg_out->msg_control, cm_data, cm_data_space);
6076         msg_out->msg_controllen = cm_data_space;
6077         SAFE_FREE(cm_data);
6078         SAFE_FREE(*tmp_control);
6079
6080         SWRAP_LOG(SWRAP_LOG_TRACE,
6081                   "msg_out->msg_controllen=%zu",
6082                   (size_t)msg_out->msg_controllen);
6083         return ret;
6084 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6085         int saved_errno = errno;
6086         *msg_out = *msg_tmp;
6087         SAFE_FREE(*tmp_control);
6088         errno = saved_errno;
6089         return ret;
6090 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6091 }
6092
6093 static ssize_t swrap_sendmsg_before(int fd,
6094                                     struct socket_info *si,
6095                                     struct msghdr *msg,
6096                                     struct iovec *tmp_iov,
6097                                     struct sockaddr_un *tmp_un,
6098                                     const struct sockaddr_un **to_un,
6099                                     const struct sockaddr **to,
6100                                     int *bcast)
6101 {
6102         size_t i, len = 0;
6103         ssize_t ret = -1;
6104
6105         if (to_un) {
6106                 *to_un = NULL;
6107         }
6108         if (to) {
6109                 *to = NULL;
6110         }
6111         if (bcast) {
6112                 *bcast = 0;
6113         }
6114
6115         SWRAP_LOCK_SI(si);
6116
6117         switch (si->type) {
6118         case SOCK_STREAM: {
6119                 unsigned long mtu;
6120
6121                 if (!si->connected) {
6122                         errno = ENOTCONN;
6123                         goto out;
6124                 }
6125
6126                 if (msg->msg_iovlen == 0) {
6127                         break;
6128                 }
6129
6130                 mtu = socket_wrapper_mtu();
6131                 for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6132                         size_t nlen;
6133                         nlen = len + msg->msg_iov[i].iov_len;
6134                         if (nlen < len) {
6135                                 /* overflow */
6136                                 errno = EMSGSIZE;
6137                                 goto out;
6138                         }
6139                         if (nlen > mtu) {
6140                                 break;
6141                         }
6142                 }
6143                 msg->msg_iovlen = i;
6144                 if (msg->msg_iovlen == 0) {
6145                         *tmp_iov = msg->msg_iov[0];
6146                         tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
6147                                                (size_t)mtu);
6148                         msg->msg_iov = tmp_iov;
6149                         msg->msg_iovlen = 1;
6150                 }
6151                 break;
6152         }
6153         case SOCK_DGRAM:
6154                 if (si->connected) {
6155                         if (msg->msg_name != NULL) {
6156                                 /*
6157                                  * We are dealing with unix sockets and if we
6158                                  * are connected, we should only talk to the
6159                                  * connected unix path. Using the fd to send
6160                                  * to another server would be hard to achieve.
6161                                  */
6162                                 msg->msg_name = NULL;
6163                                 msg->msg_namelen = 0;
6164                         }
6165                 } else {
6166                         const struct sockaddr *msg_name;
6167                         msg_name = (const struct sockaddr *)msg->msg_name;
6168
6169                         if (msg_name == NULL) {
6170                                 errno = ENOTCONN;
6171                                 goto out;
6172                         }
6173
6174
6175                         ret = sockaddr_convert_to_un(si, msg_name, msg->msg_namelen,
6176                                                      tmp_un, 0, bcast);
6177                         if (ret == -1) {
6178                                 goto out;
6179                         }
6180
6181                         if (to_un) {
6182                                 *to_un = tmp_un;
6183                         }
6184                         if (to) {
6185                                 *to = msg_name;
6186                         }
6187                         msg->msg_name = tmp_un;
6188                         msg->msg_namelen = sizeof(*tmp_un);
6189                 }
6190
6191                 if (si->bound == 0) {
6192                         ret = swrap_auto_bind(fd, si, si->family);
6193                         if (ret == -1) {
6194                                 SWRAP_UNLOCK_SI(si);
6195                                 if (errno == ENOTSOCK) {
6196                                         swrap_remove_stale(fd);
6197                                         ret = -ENOTSOCK;
6198                                 } else {
6199                                         SWRAP_LOG(SWRAP_LOG_ERROR, "swrap_sendmsg_before failed");
6200                                 }
6201                                 return ret;
6202                         }
6203                 }
6204
6205                 if (!si->defer_connect) {
6206                         break;
6207                 }
6208
6209                 ret = sockaddr_convert_to_un(si,
6210                                              &si->peername.sa.s,
6211                                              si->peername.sa_socklen,
6212                                              tmp_un,
6213                                              0,
6214                                              NULL);
6215                 if (ret == -1) {
6216                         goto out;
6217                 }
6218
6219                 ret = libc_connect(fd,
6220                                    (struct sockaddr *)(void *)tmp_un,
6221                                    sizeof(*tmp_un));
6222
6223                 /* to give better errors */
6224                 if (ret == -1 && errno == ENOENT) {
6225                         errno = EHOSTUNREACH;
6226                 }
6227
6228                 if (ret == -1) {
6229                         goto out;
6230                 }
6231
6232                 si->defer_connect = 0;
6233                 break;
6234         default:
6235                 errno = EHOSTUNREACH;
6236                 goto out;
6237         }
6238
6239         ret = 0;
6240 out:
6241         SWRAP_UNLOCK_SI(si);
6242
6243         return ret;
6244 }
6245
6246 static void swrap_sendmsg_after(int fd,
6247                                 struct socket_info *si,
6248                                 struct msghdr *msg,
6249                                 const struct sockaddr *to,
6250                                 ssize_t ret)
6251 {
6252         int saved_errno = errno;
6253         size_t i, len = 0;
6254         uint8_t *buf;
6255         off_t ofs = 0;
6256         size_t avail = 0;
6257         size_t remain;
6258
6259         /* to give better errors */
6260         if (ret == -1) {
6261                 if (saved_errno == ENOENT) {
6262                         saved_errno = EHOSTUNREACH;
6263                 } else if (saved_errno == ENOTSOCK) {
6264                         /* If the fd is not a socket, remove it */
6265                         swrap_remove_stale(fd);
6266                 }
6267         }
6268
6269         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6270                 avail += msg->msg_iov[i].iov_len;
6271         }
6272
6273         if (ret == -1) {
6274                 remain = MIN(80, avail);
6275         } else {
6276                 remain = ret;
6277         }
6278
6279         /* we capture it as one single packet */
6280         buf = (uint8_t *)malloc(remain);
6281         if (!buf) {
6282                 /* we just not capture the packet */
6283                 errno = saved_errno;
6284                 return;
6285         }
6286
6287         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6288                 size_t this_time = MIN(remain, (size_t)msg->msg_iov[i].iov_len);
6289                 if (this_time > 0) {
6290                         memcpy(buf + ofs,
6291                                msg->msg_iov[i].iov_base,
6292                                this_time);
6293                 }
6294                 ofs += this_time;
6295                 remain -= this_time;
6296         }
6297         len = ofs;
6298
6299         SWRAP_LOCK_SI(si);
6300
6301         switch (si->type) {
6302         case SOCK_STREAM:
6303                 if (ret == -1) {
6304                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND, buf, len);
6305                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND_RST, NULL, 0);
6306                 } else {
6307                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND, buf, len);
6308                 }
6309                 break;
6310
6311         case SOCK_DGRAM:
6312                 if (si->connected) {
6313                         to = &si->peername.sa.s;
6314                 }
6315                 if (ret == -1) {
6316                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6317                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO_UNREACH, buf, len);
6318                 } else {
6319                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6320                 }
6321                 break;
6322         }
6323
6324         SWRAP_UNLOCK_SI(si);
6325
6326         free(buf);
6327         errno = saved_errno;
6328 }
6329
6330 static int swrap_recvmsg_before(int fd,
6331                                 struct socket_info *si,
6332                                 struct msghdr *msg,
6333                                 struct iovec *tmp_iov)
6334 {
6335         size_t i, len = 0;
6336         int ret = -1;
6337
6338         SWRAP_LOCK_SI(si);
6339
6340         (void)fd; /* unused */
6341
6342         switch (si->type) {
6343         case SOCK_STREAM: {
6344                 unsigned int mtu;
6345                 if (!si->connected) {
6346                         errno = ENOTCONN;
6347                         goto out;
6348                 }
6349
6350                 if (msg->msg_iovlen == 0) {
6351                         break;
6352                 }
6353
6354                 mtu = socket_wrapper_mtu();
6355                 for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6356                         size_t nlen;
6357                         nlen = len + msg->msg_iov[i].iov_len;
6358                         if (nlen > mtu) {
6359                                 break;
6360                         }
6361                 }
6362                 msg->msg_iovlen = i;
6363                 if (msg->msg_iovlen == 0) {
6364                         *tmp_iov = msg->msg_iov[0];
6365                         tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
6366                                                (size_t)mtu);
6367                         msg->msg_iov = tmp_iov;
6368                         msg->msg_iovlen = 1;
6369                 }
6370                 break;
6371         }
6372         case SOCK_DGRAM:
6373                 if (msg->msg_name == NULL) {
6374                         errno = EINVAL;
6375                         goto out;
6376                 }
6377
6378                 if (msg->msg_iovlen == 0) {
6379                         break;
6380                 }
6381
6382                 if (si->bound == 0) {
6383                         ret = swrap_auto_bind(fd, si, si->family);
6384                         if (ret == -1) {
6385                                 SWRAP_UNLOCK_SI(si);
6386                                 /*
6387                                  * When attempting to read or write to a
6388                                  * descriptor, if an underlying autobind fails
6389                                  * because it's not a socket, stop intercepting
6390                                  * uses of that descriptor.
6391                                  */
6392                                 if (errno == ENOTSOCK) {
6393                                         swrap_remove_stale(fd);
6394                                         ret = -ENOTSOCK;
6395                                 } else {
6396                                         SWRAP_LOG(SWRAP_LOG_ERROR,
6397                                                   "swrap_recvmsg_before failed");
6398                                 }
6399                                 return ret;
6400                         }
6401                 }
6402                 break;
6403         default:
6404                 errno = EHOSTUNREACH;
6405                 goto out;
6406         }
6407
6408         ret = 0;
6409 out:
6410         SWRAP_UNLOCK_SI(si);
6411
6412         return ret;
6413 }
6414
6415 static int swrap_recvmsg_after(int fd,
6416                                struct socket_info *si,
6417                                struct msghdr *msg,
6418                                const struct sockaddr_un *un_addr,
6419                                socklen_t un_addrlen,
6420                                ssize_t ret)
6421 {
6422         int saved_errno = errno;
6423         size_t i;
6424         uint8_t *buf = NULL;
6425         off_t ofs = 0;
6426         size_t avail = 0;
6427         size_t remain;
6428         int rc;
6429
6430         /* to give better errors */
6431         if (ret == -1) {
6432                 if (saved_errno == ENOENT) {
6433                         saved_errno = EHOSTUNREACH;
6434                 } else if (saved_errno == ENOTSOCK) {
6435                         /* If the fd is not a socket, remove it */
6436                         swrap_remove_stale(fd);
6437                 }
6438         }
6439
6440         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6441                 avail += msg->msg_iov[i].iov_len;
6442         }
6443
6444         SWRAP_LOCK_SI(si);
6445
6446         /* Convert the socket address before we leave */
6447         if (si->type == SOCK_DGRAM && un_addr != NULL) {
6448                 rc = sockaddr_convert_from_un(si,
6449                                               un_addr,
6450                                               un_addrlen,
6451                                               si->family,
6452                                               msg->msg_name,
6453                                               &msg->msg_namelen);
6454                 if (rc == -1) {
6455                         goto done;
6456                 }
6457         }
6458
6459         if (avail == 0) {
6460                 rc = 0;
6461                 goto done;
6462         }
6463
6464         if (ret == -1) {
6465                 remain = MIN(80, avail);
6466         } else {
6467                 remain = ret;
6468         }
6469
6470         /* we capture it as one single packet */
6471         buf = (uint8_t *)malloc(remain);
6472         if (buf == NULL) {
6473                 /* we just not capture the packet */
6474                 SWRAP_UNLOCK_SI(si);
6475                 errno = saved_errno;
6476                 return -1;
6477         }
6478
6479         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6480                 size_t this_time = MIN(remain, (size_t)msg->msg_iov[i].iov_len);
6481                 memcpy(buf + ofs,
6482                        msg->msg_iov[i].iov_base,
6483                        this_time);
6484                 ofs += this_time;
6485                 remain -= this_time;
6486         }
6487
6488         switch (si->type) {
6489         case SOCK_STREAM:
6490                 if (ret == -1 && saved_errno != EAGAIN && saved_errno != ENOBUFS) {
6491                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV_RST, NULL, 0);
6492                 } else if (ret == 0) { /* END OF FILE */
6493                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV_RST, NULL, 0);
6494                 } else if (ret > 0) {
6495                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV, buf, ret);
6496                 }
6497                 break;
6498
6499         case SOCK_DGRAM:
6500                 if (ret == -1) {
6501                         break;
6502                 }
6503
6504                 if (un_addr != NULL) {
6505                         swrap_pcap_dump_packet(si,
6506                                           msg->msg_name,
6507                                           SWRAP_RECVFROM,
6508                                           buf,
6509                                           ret);
6510                 } else {
6511                         swrap_pcap_dump_packet(si,
6512                                           msg->msg_name,
6513                                           SWRAP_RECV,
6514                                           buf,
6515                                           ret);
6516                 }
6517
6518                 break;
6519         }
6520
6521         rc = 0;
6522 done:
6523         free(buf);
6524         errno = saved_errno;
6525
6526 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6527         if (rc == 0 &&
6528             msg->msg_controllen > 0 &&
6529             msg->msg_control != NULL) {
6530                 rc = swrap_msghdr_add_socket_info(si, msg);
6531                 if (rc < 0) {
6532                         SWRAP_UNLOCK_SI(si);
6533                         return -1;
6534                 }
6535         }
6536 #endif
6537
6538         SWRAP_UNLOCK_SI(si);
6539         return rc;
6540 }
6541
6542 /****************************************************************************
6543  *   RECVFROM
6544  ***************************************************************************/
6545
6546 static ssize_t swrap_recvfrom(int s, void *buf, size_t len, int flags,
6547                               struct sockaddr *from, socklen_t *fromlen)
6548 {
6549         struct swrap_address from_addr = {
6550                 .sa_socklen = sizeof(struct sockaddr_un),
6551         };
6552         ssize_t ret;
6553         struct socket_info *si = find_socket_info(s);
6554         struct swrap_address saddr = {
6555                 .sa_socklen = sizeof(struct sockaddr_storage),
6556         };
6557         struct msghdr msg;
6558         struct iovec tmp;
6559         int tret;
6560
6561         if (!si) {
6562                 return libc_recvfrom(s,
6563                                      buf,
6564                                      len,
6565                                      flags,
6566                                      from,
6567                                      fromlen);
6568         }
6569
6570         tmp.iov_base = buf;
6571         tmp.iov_len = len;
6572
6573         ZERO_STRUCT(msg);
6574         if (from != NULL && fromlen != NULL) {
6575                 msg.msg_name = from;   /* optional address */
6576                 msg.msg_namelen = *fromlen; /* size of address */
6577         } else {
6578                 msg.msg_name = &saddr.sa.s; /* optional address */
6579                 msg.msg_namelen = saddr.sa_socklen; /* size of address */
6580         }
6581         msg.msg_iov = &tmp;            /* scatter/gather array */
6582         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6583 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6584         msg.msg_control = NULL;        /* ancillary data, see below */
6585         msg.msg_controllen = 0;        /* ancillary data buffer len */
6586         msg.msg_flags = 0;             /* flags on received message */
6587 #endif
6588
6589         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6590         if (tret < 0) {
6591                 return -1;
6592         }
6593
6594         buf = msg.msg_iov[0].iov_base;
6595         len = msg.msg_iov[0].iov_len;
6596
6597         ret = libc_recvfrom(s,
6598                             buf,
6599                             len,
6600                             flags,
6601                             &from_addr.sa.s,
6602                             &from_addr.sa_socklen);
6603         if (ret == -1) {
6604                 return ret;
6605         }
6606
6607         tret = swrap_recvmsg_after(s,
6608                                    si,
6609                                    &msg,
6610                                    &from_addr.sa.un,
6611                                    from_addr.sa_socklen,
6612                                    ret);
6613         if (tret != 0) {
6614                 return tret;
6615         }
6616
6617         if (from != NULL && fromlen != NULL) {
6618                 *fromlen = msg.msg_namelen;
6619         }
6620
6621         return ret;
6622 }
6623
6624 #ifdef HAVE_ACCEPT_PSOCKLEN_T
6625 ssize_t recvfrom(int s, void *buf, size_t len, int flags,
6626                  struct sockaddr *from, Psocklen_t fromlen)
6627 #else
6628 ssize_t recvfrom(int s, void *buf, size_t len, int flags,
6629                  struct sockaddr *from, socklen_t *fromlen)
6630 #endif
6631 {
6632         return swrap_recvfrom(s, buf, len, flags, from, (socklen_t *)fromlen);
6633 }
6634
6635 /****************************************************************************
6636  *   SENDTO
6637  ***************************************************************************/
6638
6639 static ssize_t swrap_sendto(int s, const void *buf, size_t len, int flags,
6640                             const struct sockaddr *to, socklen_t tolen)
6641 {
6642         struct msghdr msg;
6643         struct iovec tmp;
6644         struct swrap_address un_addr = {
6645                 .sa_socklen = sizeof(struct sockaddr_un),
6646         };
6647         const struct sockaddr_un *to_un = NULL;
6648         ssize_t ret;
6649         int rc;
6650         struct socket_info *si = find_socket_info(s);
6651         int bcast = 0;
6652
6653         if (!si) {
6654                 return libc_sendto(s, buf, len, flags, to, tolen);
6655         }
6656
6657         tmp.iov_base = discard_const_p(char, buf);
6658         tmp.iov_len = len;
6659
6660         ZERO_STRUCT(msg);
6661         msg.msg_name = discard_const_p(struct sockaddr, to); /* optional address */
6662         msg.msg_namelen = tolen;       /* size of address */
6663         msg.msg_iov = &tmp;            /* scatter/gather array */
6664         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6665 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6666         msg.msg_control = NULL;        /* ancillary data, see below */
6667         msg.msg_controllen = 0;        /* ancillary data buffer len */
6668         msg.msg_flags = 0;             /* flags on received message */
6669 #endif
6670
6671         rc = swrap_sendmsg_before(s,
6672                                   si,
6673                                   &msg,
6674                                   &tmp,
6675                                   &un_addr.sa.un,
6676                                   &to_un,
6677                                   &to,
6678                                   &bcast);
6679         if (rc < 0) {
6680                 return -1;
6681         }
6682
6683         buf = msg.msg_iov[0].iov_base;
6684         len = msg.msg_iov[0].iov_len;
6685
6686         if (bcast) {
6687                 struct stat st;
6688                 unsigned int iface;
6689                 unsigned int prt = ntohs(((const struct sockaddr_in *)(const void *)to)->sin_port);
6690                 char type;
6691                 char *swrap_dir = NULL;
6692
6693                 type = SOCKET_TYPE_CHAR_UDP;
6694
6695                 swrap_dir = socket_wrapper_dir();
6696                 if (swrap_dir == NULL) {
6697                         return -1;
6698                 }
6699
6700                 for(iface=0; iface <= MAX_WRAPPED_INTERFACES; iface++) {
6701                         swrap_un_path(&un_addr.sa.un,
6702                                       swrap_dir,
6703                                       type,
6704                                       iface,
6705                                       prt);
6706                         if (stat(un_addr.sa.un.sun_path, &st) != 0) continue;
6707
6708                         /* ignore the any errors in broadcast sends */
6709                         libc_sendto(s,
6710                                     buf,
6711                                     len,
6712                                     flags,
6713                                     &un_addr.sa.s,
6714                                     un_addr.sa_socklen);
6715                 }
6716
6717                 SAFE_FREE(swrap_dir);
6718
6719                 SWRAP_LOCK_SI(si);
6720
6721                 swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6722
6723                 SWRAP_UNLOCK_SI(si);
6724
6725                 return len;
6726         }
6727
6728         SWRAP_LOCK_SI(si);
6729         /*
6730          * If it is a dgram socket and we are connected, don't include the
6731          * 'to' address.
6732          */
6733         if (si->type == SOCK_DGRAM && si->connected) {
6734                 ret = libc_sendto(s,
6735                                   buf,
6736                                   len,
6737                                   flags,
6738                                   NULL,
6739                                   0);
6740         } else {
6741                 ret = libc_sendto(s,
6742                                   buf,
6743                                   len,
6744                                   flags,
6745                                   (struct sockaddr *)msg.msg_name,
6746                                   msg.msg_namelen);
6747         }
6748
6749         SWRAP_UNLOCK_SI(si);
6750
6751         swrap_sendmsg_after(s, si, &msg, to, ret);
6752
6753         return ret;
6754 }
6755
6756 ssize_t sendto(int s, const void *buf, size_t len, int flags,
6757                const struct sockaddr *to, socklen_t tolen)
6758 {
6759         return swrap_sendto(s, buf, len, flags, to, tolen);
6760 }
6761
6762 /****************************************************************************
6763  *   READV
6764  ***************************************************************************/
6765
6766 static ssize_t swrap_recv(int s, void *buf, size_t len, int flags)
6767 {
6768         struct socket_info *si;
6769         struct msghdr msg;
6770         struct swrap_address saddr = {
6771                 .sa_socklen = sizeof(struct sockaddr_storage),
6772         };
6773         struct iovec tmp;
6774         ssize_t ret;
6775         int tret;
6776
6777         si = find_socket_info(s);
6778         if (si == NULL) {
6779                 return libc_recv(s, buf, len, flags);
6780         }
6781
6782         tmp.iov_base = buf;
6783         tmp.iov_len = len;
6784
6785         ZERO_STRUCT(msg);
6786         msg.msg_name = &saddr.sa.s;    /* optional address */
6787         msg.msg_namelen = saddr.sa_socklen; /* size of address */
6788         msg.msg_iov = &tmp;            /* scatter/gather array */
6789         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6790 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6791         msg.msg_control = NULL;        /* ancillary data, see below */
6792         msg.msg_controllen = 0;        /* ancillary data buffer len */
6793         msg.msg_flags = 0;             /* flags on received message */
6794 #endif
6795
6796         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6797         if (tret < 0) {
6798                 return -1;
6799         }
6800
6801         buf = msg.msg_iov[0].iov_base;
6802         len = msg.msg_iov[0].iov_len;
6803
6804         ret = libc_recv(s, buf, len, flags);
6805
6806         tret = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
6807         if (tret != 0) {
6808                 return tret;
6809         }
6810
6811         return ret;
6812 }
6813
6814 ssize_t recv(int s, void *buf, size_t len, int flags)
6815 {
6816         return swrap_recv(s, buf, len, flags);
6817 }
6818
6819 /****************************************************************************
6820  *   READ
6821  ***************************************************************************/
6822
6823 static ssize_t swrap_read(int s, void *buf, size_t len)
6824 {
6825         struct socket_info *si;
6826         struct msghdr msg;
6827         struct iovec tmp;
6828         struct swrap_address saddr = {
6829                 .sa_socklen = sizeof(struct sockaddr_storage),
6830         };
6831         ssize_t ret;
6832         int tret;
6833
6834         si = find_socket_info(s);
6835         if (si == NULL) {
6836                 return libc_read(s, buf, len);
6837         }
6838
6839         tmp.iov_base = buf;
6840         tmp.iov_len = len;
6841
6842         ZERO_STRUCT(msg);
6843         msg.msg_name = &saddr.sa.ss;   /* optional address */
6844         msg.msg_namelen = saddr.sa_socklen; /* size of address */
6845         msg.msg_iov = &tmp;            /* scatter/gather array */
6846         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6847 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6848         msg.msg_control = NULL;        /* ancillary data, see below */
6849         msg.msg_controllen = 0;        /* ancillary data buffer len */
6850         msg.msg_flags = 0;             /* flags on received message */
6851 #endif
6852
6853         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6854         if (tret < 0) {
6855                 if (tret == -ENOTSOCK) {
6856                         return libc_read(s, buf, len);
6857                 }
6858                 return -1;
6859         }
6860
6861         buf = msg.msg_iov[0].iov_base;
6862         len = msg.msg_iov[0].iov_len;
6863
6864         ret = libc_read(s, buf, len);
6865
6866         tret = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
6867         if (tret != 0) {
6868                 return tret;
6869         }
6870
6871         return ret;
6872 }
6873
6874 ssize_t read(int s, void *buf, size_t len)
6875 {
6876         return swrap_read(s, buf, len);
6877 }
6878
6879 /****************************************************************************
6880  *   WRITE
6881  ***************************************************************************/
6882
6883 static ssize_t swrap_write(int s, const void *buf, size_t len)
6884 {
6885         struct msghdr msg;
6886         struct iovec tmp;
6887         struct sockaddr_un un_addr;
6888         ssize_t ret;
6889         int rc;
6890         struct socket_info *si;
6891
6892         si = find_socket_info(s);
6893         if (si == NULL) {
6894                 return libc_write(s, buf, len);
6895         }
6896
6897         tmp.iov_base = discard_const_p(char, buf);
6898         tmp.iov_len = len;
6899
6900         ZERO_STRUCT(msg);
6901         msg.msg_name = NULL;           /* optional address */
6902         msg.msg_namelen = 0;           /* size of address */
6903         msg.msg_iov = &tmp;            /* scatter/gather array */
6904         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6905 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6906         msg.msg_control = NULL;        /* ancillary data, see below */
6907         msg.msg_controllen = 0;        /* ancillary data buffer len */
6908         msg.msg_flags = 0;             /* flags on received message */
6909 #endif
6910
6911         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
6912         if (rc < 0) {
6913                 return -1;
6914         }
6915
6916         buf = msg.msg_iov[0].iov_base;
6917         len = msg.msg_iov[0].iov_len;
6918
6919         ret = libc_write(s, buf, len);
6920
6921         swrap_sendmsg_after(s, si, &msg, NULL, ret);
6922
6923         return ret;
6924 }
6925
6926 ssize_t write(int s, const void *buf, size_t len)
6927 {
6928         return swrap_write(s, buf, len);
6929 }
6930
6931 /****************************************************************************
6932  *   SEND
6933  ***************************************************************************/
6934
6935 static ssize_t swrap_send(int s, const void *buf, size_t len, int flags)
6936 {
6937         struct msghdr msg;
6938         struct iovec tmp;
6939         struct sockaddr_un un_addr;
6940         ssize_t ret;
6941         int rc;
6942         struct socket_info *si = find_socket_info(s);
6943
6944         if (!si) {
6945                 return libc_send(s, buf, len, flags);
6946         }
6947
6948         tmp.iov_base = discard_const_p(char, buf);
6949         tmp.iov_len = len;
6950
6951         ZERO_STRUCT(msg);
6952         msg.msg_name = NULL;           /* optional address */
6953         msg.msg_namelen = 0;           /* size of address */
6954         msg.msg_iov = &tmp;            /* scatter/gather array */
6955         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6956 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6957         msg.msg_control = NULL;        /* ancillary data, see below */
6958         msg.msg_controllen = 0;        /* ancillary data buffer len */
6959         msg.msg_flags = 0;             /* flags on received message */
6960 #endif
6961
6962         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
6963         if (rc < 0) {
6964                 return -1;
6965         }
6966
6967         buf = msg.msg_iov[0].iov_base;
6968         len = msg.msg_iov[0].iov_len;
6969
6970         ret = libc_send(s, buf, len, flags);
6971
6972         swrap_sendmsg_after(s, si, &msg, NULL, ret);
6973
6974         return ret;
6975 }
6976
6977 ssize_t send(int s, const void *buf, size_t len, int flags)
6978 {
6979         return swrap_send(s, buf, len, flags);
6980 }
6981
6982 /****************************************************************************
6983  *   RECVMSG
6984  ***************************************************************************/
6985
6986 static ssize_t swrap_recvmsg(int s, struct msghdr *omsg, int flags)
6987 {
6988         struct swrap_address from_addr = {
6989                 .sa_socklen = sizeof(struct sockaddr_un),
6990         };
6991         struct swrap_address convert_addr = {
6992                 .sa_socklen = sizeof(struct sockaddr_storage),
6993         };
6994         struct socket_info *si;
6995         struct msghdr msg;
6996         struct iovec tmp;
6997 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6998         size_t msg_ctrllen_filled;
6999         size_t msg_ctrllen_left;
7000 #endif
7001
7002         ssize_t ret;
7003         int rc;
7004
7005         si = find_socket_info(s);
7006         if (si == NULL) {
7007                 uint8_t *tmp_control = NULL;
7008                 rc = swrap_recvmsg_before_unix(omsg, &msg, &tmp_control);
7009                 if (rc < 0) {
7010                         return rc;
7011                 }
7012                 ret = libc_recvmsg(s, &msg, flags);
7013                 return swrap_recvmsg_after_unix(&msg, &tmp_control, omsg, ret);
7014         }
7015
7016         tmp.iov_base = NULL;
7017         tmp.iov_len = 0;
7018
7019         ZERO_STRUCT(msg);
7020         msg.msg_name = &from_addr.sa;              /* optional address */
7021         msg.msg_namelen = from_addr.sa_socklen;    /* size of address */
7022         msg.msg_iov = omsg->msg_iov;               /* scatter/gather array */
7023         msg.msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7024 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7025         msg_ctrllen_filled = 0;
7026         msg_ctrllen_left = omsg->msg_controllen;
7027
7028         msg.msg_control = omsg->msg_control;       /* ancillary data, see below */
7029         msg.msg_controllen = omsg->msg_controllen; /* ancillary data buffer len */
7030         msg.msg_flags = omsg->msg_flags;           /* flags on received message */
7031 #endif
7032
7033         rc = swrap_recvmsg_before(s, si, &msg, &tmp);
7034         if (rc < 0) {
7035                 return -1;
7036         }
7037
7038         ret = libc_recvmsg(s, &msg, flags);
7039
7040 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7041         msg_ctrllen_filled += msg.msg_controllen;
7042         msg_ctrllen_left -= msg.msg_controllen;
7043
7044         if (omsg->msg_control != NULL) {
7045                 uint8_t *p;
7046
7047                 p = omsg->msg_control;
7048                 p += msg_ctrllen_filled;
7049
7050                 msg.msg_control = p;
7051                 msg.msg_controllen = msg_ctrllen_left;
7052         } else {
7053                 msg.msg_control = NULL;
7054                 msg.msg_controllen = 0;
7055         }
7056 #endif
7057
7058         /*
7059          * We convert the unix address to a IP address so we need a buffer
7060          * which can store the address in case of SOCK_DGRAM, see below.
7061          */
7062         msg.msg_name = &convert_addr.sa;
7063         msg.msg_namelen = convert_addr.sa_socklen;
7064
7065         rc = swrap_recvmsg_after(s,
7066                                  si,
7067                                  &msg,
7068                                  &from_addr.sa.un,
7069                                  from_addr.sa_socklen,
7070                                  ret);
7071         if (rc != 0) {
7072                 return rc;
7073         }
7074
7075 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7076         if (omsg->msg_control != NULL) {
7077                 /* msg.msg_controllen = space left */
7078                 msg_ctrllen_left = msg.msg_controllen;
7079                 msg_ctrllen_filled = omsg->msg_controllen - msg_ctrllen_left;
7080         }
7081
7082         /* Update the original message length */
7083         omsg->msg_controllen = msg_ctrllen_filled;
7084         omsg->msg_flags = msg.msg_flags;
7085 #endif
7086         omsg->msg_iovlen = msg.msg_iovlen;
7087
7088         SWRAP_LOCK_SI(si);
7089
7090         /*
7091          * From the manpage:
7092          *
7093          * The  msg_name  field  points  to a caller-allocated buffer that is
7094          * used to return the source address if the socket is unconnected.  The
7095          * caller should set msg_namelen to the size of this buffer before this
7096          * call; upon return from a successful call, msg_name will contain the
7097          * length of the returned address.  If the application  does  not  need
7098          * to know the source address, msg_name can be specified as NULL.
7099          */
7100         if (si->type == SOCK_STREAM) {
7101                 omsg->msg_namelen = 0;
7102         } else if (omsg->msg_name != NULL &&
7103                    omsg->msg_namelen != 0 &&
7104                    omsg->msg_namelen >= msg.msg_namelen) {
7105                 memcpy(omsg->msg_name, msg.msg_name, msg.msg_namelen);
7106                 omsg->msg_namelen = msg.msg_namelen;
7107         }
7108
7109         SWRAP_UNLOCK_SI(si);
7110
7111         return ret;
7112 }
7113
7114 ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags)
7115 {
7116         return swrap_recvmsg(sockfd, msg, flags);
7117 }
7118
7119 /****************************************************************************
7120  *   SENDMSG
7121  ***************************************************************************/
7122
7123 static ssize_t swrap_sendmsg(int s, const struct msghdr *omsg, int flags)
7124 {
7125         struct msghdr msg;
7126         struct iovec tmp;
7127         struct sockaddr_un un_addr;
7128         const struct sockaddr_un *to_un = NULL;
7129         const struct sockaddr *to = NULL;
7130         ssize_t ret;
7131         int rc;
7132         struct socket_info *si = find_socket_info(s);
7133         int bcast = 0;
7134
7135         if (!si) {
7136                 int scm_rights_pipe_fd = -1;
7137
7138                 rc = swrap_sendmsg_before_unix(omsg, &msg,
7139                                                &scm_rights_pipe_fd);
7140                 if (rc < 0) {
7141                         return rc;
7142                 }
7143                 ret = libc_sendmsg(s, &msg, flags);
7144                 return swrap_sendmsg_after_unix(&msg, ret, scm_rights_pipe_fd);
7145         }
7146
7147         ZERO_STRUCT(un_addr);
7148
7149         tmp.iov_base = NULL;
7150         tmp.iov_len = 0;
7151
7152         ZERO_STRUCT(msg);
7153
7154         SWRAP_LOCK_SI(si);
7155
7156         if (si->connected == 0) {
7157                 msg.msg_name = omsg->msg_name;             /* optional address */
7158                 msg.msg_namelen = omsg->msg_namelen;       /* size of address */
7159         }
7160         msg.msg_iov = omsg->msg_iov;               /* scatter/gather array */
7161         msg.msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7162
7163         SWRAP_UNLOCK_SI(si);
7164
7165 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7166         if (omsg != NULL && omsg->msg_controllen > 0 && omsg->msg_control != NULL) {
7167                 uint8_t *cmbuf = NULL;
7168                 size_t cmlen = 0;
7169
7170                 rc = swrap_sendmsg_filter_cmsghdr(omsg, &cmbuf, &cmlen);
7171                 if (rc < 0) {
7172                         return rc;
7173                 }
7174
7175                 if (cmlen == 0) {
7176                         msg.msg_controllen = 0;
7177                         msg.msg_control = NULL;
7178                 } else {
7179                         msg.msg_control = cmbuf;
7180                         msg.msg_controllen = cmlen;
7181                 }
7182         }
7183         msg.msg_flags = omsg->msg_flags;           /* flags on received message */
7184 #endif
7185         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, &to_un, &to, &bcast);
7186         if (rc < 0) {
7187                 int saved_errno = errno;
7188 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7189                 SAFE_FREE(msg.msg_control);
7190 #endif
7191                 errno = saved_errno;
7192                 return -1;
7193         }
7194
7195         if (bcast) {
7196                 struct stat st;
7197                 unsigned int iface;
7198                 unsigned int prt = ntohs(((const struct sockaddr_in *)(const void *)to)->sin_port);
7199                 char type;
7200                 size_t i, len = 0;
7201                 uint8_t *buf;
7202                 off_t ofs = 0;
7203                 size_t avail = 0;
7204                 size_t remain;
7205                 char *swrap_dir = NULL;
7206
7207                 for (i = 0; i < (size_t)msg.msg_iovlen; i++) {
7208                         avail += msg.msg_iov[i].iov_len;
7209                 }
7210
7211                 len = avail;
7212                 remain = avail;
7213
7214                 /* we capture it as one single packet */
7215                 buf = (uint8_t *)malloc(remain);
7216                 if (!buf) {
7217                         int saved_errno = errno;
7218 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7219                         SAFE_FREE(msg.msg_control);
7220 #endif
7221                         errno = saved_errno;
7222                         return -1;
7223                 }
7224
7225                 for (i = 0; i < (size_t)msg.msg_iovlen; i++) {
7226                         size_t this_time = MIN(remain, (size_t)msg.msg_iov[i].iov_len);
7227                         memcpy(buf + ofs,
7228                                msg.msg_iov[i].iov_base,
7229                                this_time);
7230                         ofs += this_time;
7231                         remain -= this_time;
7232                 }
7233
7234                 type = SOCKET_TYPE_CHAR_UDP;
7235
7236                 swrap_dir = socket_wrapper_dir();
7237                 if (swrap_dir == NULL) {
7238                         int saved_errno = errno;
7239 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7240                         SAFE_FREE(msg.msg_control);
7241 #endif
7242                         SAFE_FREE(buf);
7243                         errno = saved_errno;
7244                         return -1;
7245                 }
7246
7247                 for(iface=0; iface <= MAX_WRAPPED_INTERFACES; iface++) {
7248                         swrap_un_path(&un_addr, swrap_dir, type, iface, prt);
7249                         if (stat(un_addr.sun_path, &st) != 0) continue;
7250
7251                         msg.msg_name = &un_addr;           /* optional address */
7252                         msg.msg_namelen = sizeof(un_addr); /* size of address */
7253
7254                         /* ignore the any errors in broadcast sends */
7255                         libc_sendmsg(s, &msg, flags);
7256                 }
7257
7258                 SAFE_FREE(swrap_dir);
7259
7260                 SWRAP_LOCK_SI(si);
7261
7262                 swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
7263                 free(buf);
7264
7265                 SWRAP_UNLOCK_SI(si);
7266
7267                 return len;
7268         }
7269
7270         ret = libc_sendmsg(s, &msg, flags);
7271
7272         swrap_sendmsg_after(s, si, &msg, to, ret);
7273
7274 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7275         {
7276                 int saved_errno = errno;
7277                 SAFE_FREE(msg.msg_control);
7278                 errno = saved_errno;
7279         }
7280 #endif
7281
7282         return ret;
7283 }
7284
7285 ssize_t sendmsg(int s, const struct msghdr *omsg, int flags)
7286 {
7287         return swrap_sendmsg(s, omsg, flags);
7288 }
7289
7290 /****************************************************************************
7291  *   READV
7292  ***************************************************************************/
7293
7294 static ssize_t swrap_readv(int s, const struct iovec *vector, int count)
7295 {
7296         struct socket_info *si;
7297         struct msghdr msg;
7298         struct iovec tmp;
7299         struct swrap_address saddr = {
7300                 .sa_socklen = sizeof(struct sockaddr_storage)
7301         };
7302         ssize_t ret;
7303         int rc;
7304
7305         si = find_socket_info(s);
7306         if (si == NULL) {
7307                 return libc_readv(s, vector, count);
7308         }
7309
7310         tmp.iov_base = NULL;
7311         tmp.iov_len = 0;
7312
7313         ZERO_STRUCT(msg);
7314         msg.msg_name = &saddr.sa.s; /* optional address */
7315         msg.msg_namelen = saddr.sa_socklen;      /* size of address */
7316         msg.msg_iov = discard_const_p(struct iovec, vector); /* scatter/gather array */
7317         msg.msg_iovlen = count;        /* # elements in msg_iov */
7318 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7319         msg.msg_control = NULL;        /* ancillary data, see below */
7320         msg.msg_controllen = 0;        /* ancillary data buffer len */
7321         msg.msg_flags = 0;             /* flags on received message */
7322 #endif
7323
7324         rc = swrap_recvmsg_before(s, si, &msg, &tmp);
7325         if (rc < 0) {
7326                 if (rc == -ENOTSOCK) {
7327                         return libc_readv(s, vector, count);
7328                 }
7329                 return -1;
7330         }
7331
7332         ret = libc_readv(s, msg.msg_iov, msg.msg_iovlen);
7333
7334         rc = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
7335         if (rc != 0) {
7336                 return rc;
7337         }
7338
7339         return ret;
7340 }
7341
7342 ssize_t readv(int s, const struct iovec *vector, int count)
7343 {
7344         return swrap_readv(s, vector, count);
7345 }
7346
7347 /****************************************************************************
7348  *   WRITEV
7349  ***************************************************************************/
7350
7351 static ssize_t swrap_writev(int s, const struct iovec *vector, int count)
7352 {
7353         struct msghdr msg;
7354         struct iovec tmp;
7355         struct sockaddr_un un_addr;
7356         ssize_t ret;
7357         int rc;
7358         struct socket_info *si = find_socket_info(s);
7359
7360         if (!si) {
7361                 return libc_writev(s, vector, count);
7362         }
7363
7364         tmp.iov_base = NULL;
7365         tmp.iov_len = 0;
7366
7367         ZERO_STRUCT(msg);
7368         msg.msg_name = NULL;           /* optional address */
7369         msg.msg_namelen = 0;           /* size of address */
7370         msg.msg_iov = discard_const_p(struct iovec, vector); /* scatter/gather array */
7371         msg.msg_iovlen = count;        /* # elements in msg_iov */
7372 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7373         msg.msg_control = NULL;        /* ancillary data, see below */
7374         msg.msg_controllen = 0;        /* ancillary data buffer len */
7375         msg.msg_flags = 0;             /* flags on received message */
7376 #endif
7377
7378         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
7379         if (rc < 0) {
7380                 if (rc == -ENOTSOCK) {
7381                         return libc_readv(s, vector, count);
7382                 }
7383                 return -1;
7384         }
7385
7386         ret = libc_writev(s, msg.msg_iov, msg.msg_iovlen);
7387
7388         swrap_sendmsg_after(s, si, &msg, NULL, ret);
7389
7390         return ret;
7391 }
7392
7393 ssize_t writev(int s, const struct iovec *vector, int count)
7394 {
7395         return swrap_writev(s, vector, count);
7396 }
7397
7398 /****************************
7399  * CLOSE
7400  ***************************/
7401
7402 static int swrap_remove_wrapper(const char *__func_name,
7403                                 int (*__close_fd_fn)(int fd),
7404                                 int fd)
7405 {
7406         struct socket_info *si = NULL;
7407         int si_index;
7408         int ret_errno = errno;
7409         int ret;
7410
7411         swrap_mutex_lock(&socket_reset_mutex);
7412
7413         si_index = find_socket_info_index(fd);
7414         if (si_index == -1) {
7415                 swrap_mutex_unlock(&socket_reset_mutex);
7416                 return __close_fd_fn(fd);
7417         }
7418
7419         swrap_log(SWRAP_LOG_TRACE, __func_name, "Remove wrapper for fd=%d", fd);
7420         reset_socket_info_index(fd);
7421
7422         si = swrap_get_socket_info(si_index);
7423
7424         swrap_mutex_lock(&first_free_mutex);
7425         SWRAP_LOCK_SI(si);
7426
7427         ret = __close_fd_fn(fd);
7428         if (ret == -1) {
7429                 ret_errno = errno;
7430         }
7431
7432         swrap_dec_refcount(si);
7433
7434         if (swrap_get_refcount(si) > 0) {
7435                 /* there are still references left */
7436                 goto out;
7437         }
7438
7439         if (si->fd_passed) {
7440                 goto set_next_free;
7441         }
7442
7443         if (si->myname.sa_socklen > 0 && si->peername.sa_socklen > 0) {
7444                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_SEND, NULL, 0);
7445         }
7446
7447         if (si->myname.sa_socklen > 0 && si->peername.sa_socklen > 0) {
7448                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_RECV, NULL, 0);
7449                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_ACK, NULL, 0);
7450         }
7451
7452         if (si->un_addr.sun_path[0] != '\0') {
7453                 unlink(si->un_addr.sun_path);
7454         }
7455
7456 set_next_free:
7457         swrap_set_next_free(si, first_free);
7458         first_free = si_index;
7459
7460 out:
7461         SWRAP_UNLOCK_SI(si);
7462         swrap_mutex_unlock(&first_free_mutex);
7463         swrap_mutex_unlock(&socket_reset_mutex);
7464
7465         errno = ret_errno;
7466         return ret;
7467 }
7468
7469 static int swrap_noop_close(int fd)
7470 {
7471         (void)fd; /* unused */
7472         return 0;
7473 }
7474
7475 static void swrap_remove_stale(int fd)
7476 {
7477         swrap_remove_wrapper(__func__, swrap_noop_close, fd);
7478 }
7479
7480 /*
7481  * This allows socket_wrapper aware applications to
7482  * indicate that the given fd does not belong to
7483  * an inet socket.
7484  *
7485  * We already overload a lot of unrelated functions
7486  * like eventfd(), timerfd_create(), ... in order to
7487  * call swrap_remove_stale() on the returned fd, but
7488  * we'll never be able to handle all possible syscalls.
7489  *
7490  * socket_wrapper_indicate_no_inet_fd() gives them a way
7491  * to do the same.
7492  *
7493  * We don't export swrap_remove_stale() in order to
7494  * make it easier to analyze SOCKET_WRAPPER_DEBUGLEVEL=3
7495  * log files.
7496  */
7497 void socket_wrapper_indicate_no_inet_fd(int fd)
7498 {
7499         swrap_remove_wrapper(__func__, swrap_noop_close, fd);
7500 }
7501
7502 static int swrap_close(int fd)
7503 {
7504         return swrap_remove_wrapper(__func__, libc_close, fd);
7505 }
7506
7507 int close(int fd)
7508 {
7509         return swrap_close(fd);
7510 }
7511
7512 #ifdef HAVE___CLOSE_NOCANCEL
7513
7514 static int swrap___close_nocancel(int fd)
7515 {
7516         return swrap_remove_wrapper(__func__, libc___close_nocancel, fd);
7517 }
7518
7519 int __close_nocancel(int fd);
7520 int __close_nocancel(int fd)
7521 {
7522         return swrap___close_nocancel(fd);
7523 }
7524
7525 #endif /* HAVE___CLOSE_NOCANCEL */
7526
7527 /****************************
7528  * DUP
7529  ***************************/
7530
7531 static int swrap_dup(int fd)
7532 {
7533         struct socket_info *si;
7534         int dup_fd, idx;
7535
7536         idx = find_socket_info_index(fd);
7537         if (idx == -1) {
7538                 return libc_dup(fd);
7539         }
7540
7541         si = swrap_get_socket_info(idx);
7542
7543         dup_fd = libc_dup(fd);
7544         if (dup_fd == -1) {
7545                 int saved_errno = errno;
7546                 errno = saved_errno;
7547                 return -1;
7548         }
7549
7550         if ((size_t)dup_fd >= socket_fds_max) {
7551                 SWRAP_LOG(SWRAP_LOG_ERROR,
7552                           "The max socket index limit of %zu has been reached, "
7553                           "trying to add %d",
7554                           socket_fds_max,
7555                           dup_fd);
7556                 libc_close(dup_fd);
7557                 errno = EMFILE;
7558                 return -1;
7559         }
7560
7561         SWRAP_LOCK_SI(si);
7562
7563         swrap_inc_refcount(si);
7564
7565         SWRAP_UNLOCK_SI(si);
7566
7567         /* Make sure we don't have an entry for the fd */
7568         swrap_remove_stale(dup_fd);
7569
7570         set_socket_info_index(dup_fd, idx);
7571
7572         return dup_fd;
7573 }
7574
7575 int dup(int fd)
7576 {
7577         return swrap_dup(fd);
7578 }
7579
7580 /****************************
7581  * DUP2
7582  ***************************/
7583
7584 static int swrap_dup2(int fd, int newfd)
7585 {
7586         struct socket_info *si;
7587         int dup_fd, idx;
7588
7589         idx = find_socket_info_index(fd);
7590         if (idx == -1) {
7591                 return libc_dup2(fd, newfd);
7592         }
7593
7594         si = swrap_get_socket_info(idx);
7595
7596         if (fd == newfd) {
7597                 /*
7598                  * According to the manpage:
7599                  *
7600                  * "If oldfd is a valid file descriptor, and newfd has the same
7601                  * value as oldfd, then dup2() does nothing, and returns newfd."
7602                  */
7603                 return newfd;
7604         }
7605
7606         if ((size_t)newfd >= socket_fds_max) {
7607                 SWRAP_LOG(SWRAP_LOG_ERROR,
7608                           "The max socket index limit of %zu has been reached, "
7609                           "trying to add %d",
7610                           socket_fds_max,
7611                           newfd);
7612                 errno = EMFILE;
7613                 return -1;
7614         }
7615
7616         if (find_socket_info(newfd)) {
7617                 /* dup2() does an implicit close of newfd, which we
7618                  * need to emulate */
7619                 swrap_close(newfd);
7620         }
7621
7622         dup_fd = libc_dup2(fd, newfd);
7623         if (dup_fd == -1) {
7624                 int saved_errno = errno;
7625                 errno = saved_errno;
7626                 return -1;
7627         }
7628
7629         SWRAP_LOCK_SI(si);
7630
7631         swrap_inc_refcount(si);
7632
7633         SWRAP_UNLOCK_SI(si);
7634
7635         /* Make sure we don't have an entry for the fd */
7636         swrap_remove_stale(dup_fd);
7637
7638         set_socket_info_index(dup_fd, idx);
7639
7640         return dup_fd;
7641 }
7642
7643 int dup2(int fd, int newfd)
7644 {
7645         return swrap_dup2(fd, newfd);
7646 }
7647
7648 /****************************
7649  * FCNTL
7650  ***************************/
7651
7652 static int swrap_vfcntl(int fd, int cmd, va_list va)
7653 {
7654         struct socket_info *si;
7655         int rc, dup_fd, idx;
7656
7657         idx = find_socket_info_index(fd);
7658         if (idx == -1) {
7659                 return libc_vfcntl(fd, cmd, va);
7660         }
7661
7662         si = swrap_get_socket_info(idx);
7663
7664         switch (cmd) {
7665         case F_DUPFD:
7666                 dup_fd = libc_vfcntl(fd, cmd, va);
7667                 if (dup_fd == -1) {
7668                         int saved_errno = errno;
7669                         errno = saved_errno;
7670                         return -1;
7671                 }
7672
7673                 /* Make sure we don't have an entry for the fd */
7674                 swrap_remove_stale(dup_fd);
7675
7676                 if ((size_t)dup_fd >= socket_fds_max) {
7677                         SWRAP_LOG(SWRAP_LOG_ERROR,
7678                           "The max socket index limit of %zu has been reached, "
7679                           "trying to add %d",
7680                           socket_fds_max,
7681                           dup_fd);
7682                         libc_close(dup_fd);
7683                         errno = EMFILE;
7684                         return -1;
7685                 }
7686
7687                 SWRAP_LOCK_SI(si);
7688
7689                 swrap_inc_refcount(si);
7690
7691                 SWRAP_UNLOCK_SI(si);
7692
7693
7694                 set_socket_info_index(dup_fd, idx);
7695
7696                 rc = dup_fd;
7697                 break;
7698         default:
7699                 rc = libc_vfcntl(fd, cmd, va);
7700                 break;
7701         }
7702
7703         return rc;
7704 }
7705
7706 int fcntl(int fd, int cmd, ...)
7707 {
7708         va_list va;
7709         int rc;
7710
7711         va_start(va, cmd);
7712
7713         rc = swrap_vfcntl(fd, cmd, va);
7714
7715         va_end(va);
7716
7717         return rc;
7718 }
7719
7720 /****************************
7721  * EVENTFD
7722  ***************************/
7723
7724 #ifdef HAVE_EVENTFD
7725 static int swrap_eventfd(int count, int flags)
7726 {
7727         int fd;
7728
7729         fd = libc_eventfd(count, flags);
7730         if (fd != -1) {
7731                 swrap_remove_stale(fd);
7732         }
7733
7734         return fd;
7735 }
7736
7737 #ifdef HAVE_EVENTFD_UNSIGNED_INT
7738 int eventfd(unsigned int count, int flags)
7739 #else
7740 int eventfd(int count, int flags)
7741 #endif
7742 {
7743         return swrap_eventfd(count, flags);
7744 }
7745 #endif
7746
7747 #ifdef HAVE_PLEDGE
7748 int pledge(const char *promises, const char *paths[])
7749 {
7750         (void)promises; /* unused */
7751         (void)paths; /* unused */
7752
7753         return 0;
7754 }
7755 #endif /* HAVE_PLEDGE */
7756
7757 static void swrap_thread_prepare(void)
7758 {
7759         /*
7760          * This function should only be called here!!
7761          *
7762          * We bind all symobls to avoid deadlocks of the fork is
7763          * interrupted by a signal handler using a symbol of this
7764          * library.
7765          */
7766         swrap_bind_symbol_all();
7767
7768         SWRAP_LOCK_ALL;
7769 }
7770
7771 static void swrap_thread_parent(void)
7772 {
7773         SWRAP_UNLOCK_ALL;
7774 }
7775
7776 static void swrap_thread_child(void)
7777 {
7778         SWRAP_REINIT_ALL;
7779 }
7780
7781 /****************************
7782  * CONSTRUCTOR
7783  ***************************/
7784 void swrap_constructor(void)
7785 {
7786         if (PIPE_BUF < sizeof(struct swrap_unix_scm_rights)) {
7787                 SWRAP_LOG(SWRAP_LOG_ERROR,
7788                           "PIPE_BUF=%zu < "
7789                           "sizeof(struct swrap_unix_scm_rights)=%zu\n"
7790                           "sizeof(struct swrap_unix_scm_rights_payload)=%zu "
7791                           "sizeof(struct socket_info)=%zu",
7792                           (size_t)PIPE_BUF,
7793                           sizeof(struct swrap_unix_scm_rights),
7794                           sizeof(struct swrap_unix_scm_rights_payload),
7795                           sizeof(struct socket_info));
7796                 exit(-1);
7797         }
7798
7799         SWRAP_REINIT_ALL;
7800
7801         /*
7802         * If we hold a lock and the application forks, then the child
7803         * is not able to unlock the mutex and we are in a deadlock.
7804         * This should prevent such deadlocks.
7805         */
7806         pthread_atfork(&swrap_thread_prepare,
7807                        &swrap_thread_parent,
7808                        &swrap_thread_child);
7809 }
7810
7811 /****************************
7812  * DESTRUCTOR
7813  ***************************/
7814
7815 /*
7816  * This function is called when the library is unloaded and makes sure that
7817  * sockets get closed and the unix file for the socket are unlinked.
7818  */
7819 void swrap_destructor(void)
7820 {
7821         size_t i;
7822
7823         if (socket_fds_idx != NULL) {
7824                 for (i = 0; i < socket_fds_max; ++i) {
7825                         if (socket_fds_idx[i] != -1) {
7826                                 swrap_close(i);
7827                         }
7828                 }
7829                 SAFE_FREE(socket_fds_idx);
7830         }
7831
7832         SAFE_FREE(sockets);
7833
7834         if (swrap.libc.handle != NULL
7835 #ifdef RTLD_NEXT
7836             && swrap.libc.handle != RTLD_NEXT
7837 #endif
7838                         ) {
7839                 dlclose(swrap.libc.handle);
7840         }
7841         if (swrap.libc.socket_handle
7842 #ifdef RTLD_NEXT
7843             && swrap.libc.socket_handle != RTLD_NEXT
7844 #endif
7845                         ) {
7846                 dlclose(swrap.libc.socket_handle);
7847         }
7848 }
7849
7850 #if defined(HAVE__SOCKET) && defined(HAVE__CLOSE)
7851 /*
7852  * On FreeBSD 12 (and maybe other platforms)
7853  * system libraries like libresolv prefix there
7854  * syscalls with '_' in order to always use
7855  * the symbols from libc.
7856  *
7857  * In the interaction with resolv_wrapper,
7858  * we need to inject socket wrapper into libresolv,
7859  * which means we need to private all socket
7860  * related syscalls also with the '_' prefix.
7861  *
7862  * This is tested in Samba's 'make test',
7863  * there we noticed that providing '_read',
7864  * '_open' and '_close' would cause errors, which
7865  * means we skip '_read', '_write' and
7866  * all non socket related calls without
7867  * further analyzing the problem.
7868  */
7869 #define SWRAP_SYMBOL_ALIAS(__sym, __aliassym) \
7870         extern typeof(__sym) __aliassym __attribute__ ((alias(#__sym)))
7871
7872 #ifdef HAVE_ACCEPT4
7873 SWRAP_SYMBOL_ALIAS(accept4, _accept4);
7874 #endif
7875 SWRAP_SYMBOL_ALIAS(accept, _accept);
7876 SWRAP_SYMBOL_ALIAS(bind, _bind);
7877 SWRAP_SYMBOL_ALIAS(connect, _connect);
7878 SWRAP_SYMBOL_ALIAS(dup, _dup);
7879 SWRAP_SYMBOL_ALIAS(dup2, _dup2);
7880 SWRAP_SYMBOL_ALIAS(fcntl, _fcntl);
7881 SWRAP_SYMBOL_ALIAS(getpeername, _getpeername);
7882 SWRAP_SYMBOL_ALIAS(getsockname, _getsockname);
7883 SWRAP_SYMBOL_ALIAS(getsockopt, _getsockopt);
7884 SWRAP_SYMBOL_ALIAS(ioctl, _ioctl);
7885 SWRAP_SYMBOL_ALIAS(listen, _listen);
7886 SWRAP_SYMBOL_ALIAS(readv, _readv);
7887 SWRAP_SYMBOL_ALIAS(recv, _recv);
7888 SWRAP_SYMBOL_ALIAS(recvfrom, _recvfrom);
7889 SWRAP_SYMBOL_ALIAS(recvmsg, _recvmsg);
7890 SWRAP_SYMBOL_ALIAS(send, _send);
7891 SWRAP_SYMBOL_ALIAS(sendmsg, _sendmsg);
7892 SWRAP_SYMBOL_ALIAS(sendto, _sendto);
7893 SWRAP_SYMBOL_ALIAS(setsockopt, _setsockopt);
7894 SWRAP_SYMBOL_ALIAS(socket, _socket);
7895 SWRAP_SYMBOL_ALIAS(socketpair, _socketpair);
7896 SWRAP_SYMBOL_ALIAS(writev, _writev);
7897
7898 #endif /* SOCKET_WRAPPER_EXPORT_UNDERSCORE_SYMBOLS */