bedda07a72b939cb8a302b40bca174d048925009
[socket_wrapper.git] / src / socket_wrapper.c
1 /*
2  * BSD 3-Clause License
3  *
4  * Copyright (c) 2005-2008, Jelmer Vernooij <jelmer@samba.org>
5  * Copyright (c) 2006-2021, Stefan Metzmacher <metze@samba.org>
6  * Copyright (c) 2013-2021, Andreas Schneider <asn@samba.org>
7  * Copyright (c) 2014-2017, Michael Adam <obnox@samba.org>
8  * Copyright (c) 2016-2018, Anoop C S <anoopcs@redhat.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  *
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * 3. Neither the name of the author nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38
39 /*
40    Socket wrapper library. Passes all socket communication over
41    unix domain sockets if the environment variable SOCKET_WRAPPER_DIR
42    is set.
43 */
44
45 #include "config.h"
46
47 #include <sys/types.h>
48 #include <sys/time.h>
49 #include <sys/stat.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #ifdef HAVE_SYS_FILIO_H
53 #include <sys/filio.h>
54 #endif
55 #ifdef HAVE_SYS_SIGNALFD_H
56 #include <sys/signalfd.h>
57 #endif
58 #ifdef HAVE_SYS_EVENTFD_H
59 #include <sys/eventfd.h>
60 #endif
61 #ifdef HAVE_SYS_TIMERFD_H
62 #include <sys/timerfd.h>
63 #endif
64 #include <sys/uio.h>
65 #include <errno.h>
66 #include <sys/un.h>
67 #include <netinet/in.h>
68 #include <netinet/tcp.h>
69 #ifdef HAVE_NETINET_TCP_FSM_H
70 #include <netinet/tcp_fsm.h>
71 #endif
72 #include <arpa/inet.h>
73 #include <fcntl.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <stdio.h>
77 #include <stdint.h>
78 #include <stdarg.h>
79 #include <stdbool.h>
80 #include <unistd.h>
81 #ifdef HAVE_GNU_LIB_NAMES_H
82 #include <gnu/lib-names.h>
83 #endif
84 #ifdef HAVE_RPC_RPC_H
85 #include <rpc/rpc.h>
86 #endif
87 #include <pthread.h>
88
89 #include "socket_wrapper.h"
90
91 enum swrap_dbglvl_e {
92         SWRAP_LOG_ERROR = 0,
93         SWRAP_LOG_WARN,
94         SWRAP_LOG_DEBUG,
95         SWRAP_LOG_TRACE
96 };
97
98 /* GCC have printf type attribute check. */
99 #ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT
100 #define PRINTF_ATTRIBUTE(a,b) __attribute__ ((__format__ (__printf__, a, b)))
101 #else
102 #define PRINTF_ATTRIBUTE(a,b)
103 #endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */
104
105 #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
106 #define CONSTRUCTOR_ATTRIBUTE __attribute__ ((constructor))
107 #else
108 #define CONSTRUCTOR_ATTRIBUTE
109 #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
110
111 #ifdef HAVE_DESTRUCTOR_ATTRIBUTE
112 #define DESTRUCTOR_ATTRIBUTE __attribute__ ((destructor))
113 #else
114 #define DESTRUCTOR_ATTRIBUTE
115 #endif
116
117 #ifndef FALL_THROUGH
118 # ifdef HAVE_FALLTHROUGH_ATTRIBUTE
119 #  define FALL_THROUGH __attribute__ ((fallthrough))
120 # else /* HAVE_FALLTHROUGH_ATTRIBUTE */
121 #  define FALL_THROUGH ((void)0)
122 # endif /* HAVE_FALLTHROUGH_ATTRIBUTE */
123 #endif /* FALL_THROUGH */
124
125 #ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE
126 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE __attribute__((no_sanitize_address))
127 #else
128 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
129 #endif
130
131 #ifdef HAVE_GCC_THREAD_LOCAL_STORAGE
132 # define SWRAP_THREAD __thread
133 #else
134 # define SWRAP_THREAD
135 #endif
136
137 #ifndef MIN
138 #define MIN(a,b) ((a)<(b)?(a):(b))
139 #endif
140
141 #ifndef ZERO_STRUCT
142 #define ZERO_STRUCT(x) memset((char *)&(x), 0, sizeof(x))
143 #endif
144
145 #ifndef ZERO_STRUCTP
146 #define ZERO_STRUCTP(x) do { \
147                 if ((x) != NULL) \
148                         memset((char *)(x), 0, sizeof(*(x))); \
149         } while(0)
150 #endif
151
152 #ifndef SAFE_FREE
153 #define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0)
154 #endif
155
156 #ifndef discard_const
157 #define discard_const(ptr) ((void *)((uintptr_t)(ptr)))
158 #endif
159
160 #ifndef discard_const_p
161 #define discard_const_p(type, ptr) ((type *)discard_const(ptr))
162 #endif
163
164 #define UNUSED(x) (void)(x)
165
166 #ifdef IPV6_PKTINFO
167 # ifndef IPV6_RECVPKTINFO
168 #  define IPV6_RECVPKTINFO IPV6_PKTINFO
169 # endif /* IPV6_RECVPKTINFO */
170 #endif /* IPV6_PKTINFO */
171
172 /*
173  * On BSD IP_PKTINFO has a different name because during
174  * the time when they implemented it, there was no RFC.
175  * The name for IPv6 is the same as on Linux.
176  */
177 #ifndef IP_PKTINFO
178 # ifdef IP_RECVDSTADDR
179 #  define IP_PKTINFO IP_RECVDSTADDR
180 # endif
181 #endif
182
183 #define socket_wrapper_init_mutex(m) \
184         _socket_wrapper_init_mutex(m, #m)
185
186 /* Add new global locks here please */
187 # define SWRAP_REINIT_ALL do { \
188         int ret; \
189         ret = socket_wrapper_init_mutex(&sockets_mutex); \
190         if (ret != 0) exit(-1); \
191         ret = socket_wrapper_init_mutex(&socket_reset_mutex); \
192         if (ret != 0) exit(-1); \
193         ret = socket_wrapper_init_mutex(&first_free_mutex); \
194         if (ret != 0) exit(-1); \
195         ret = socket_wrapper_init_mutex(&sockets_si_global); \
196         if (ret != 0) exit(-1); \
197         ret = socket_wrapper_init_mutex(&autobind_start_mutex); \
198         if (ret != 0) exit(-1); \
199         ret = socket_wrapper_init_mutex(&pcap_dump_mutex); \
200         if (ret != 0) exit(-1); \
201         ret = socket_wrapper_init_mutex(&mtu_update_mutex); \
202         if (ret != 0) exit(-1); \
203 } while(0)
204
205 # define SWRAP_LOCK_ALL do { \
206         swrap_mutex_lock(&sockets_mutex); \
207         swrap_mutex_lock(&socket_reset_mutex); \
208         swrap_mutex_lock(&first_free_mutex); \
209         swrap_mutex_lock(&sockets_si_global); \
210         swrap_mutex_lock(&autobind_start_mutex); \
211         swrap_mutex_lock(&pcap_dump_mutex); \
212         swrap_mutex_lock(&mtu_update_mutex); \
213 } while(0)
214
215 # define SWRAP_UNLOCK_ALL do { \
216         swrap_mutex_unlock(&mtu_update_mutex); \
217         swrap_mutex_unlock(&pcap_dump_mutex); \
218         swrap_mutex_unlock(&autobind_start_mutex); \
219         swrap_mutex_unlock(&sockets_si_global); \
220         swrap_mutex_unlock(&first_free_mutex); \
221         swrap_mutex_unlock(&socket_reset_mutex); \
222         swrap_mutex_unlock(&sockets_mutex); \
223 } while(0)
224
225 #define SOCKET_INFO_CONTAINER(si) \
226         (struct socket_info_container *)(si)
227
228 #define SWRAP_LOCK_SI(si) do { \
229         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
230         if (sic != NULL) { \
231                 swrap_mutex_lock(&sockets_si_global); \
232         } else { \
233                 abort(); \
234         } \
235 } while(0)
236
237 #define SWRAP_UNLOCK_SI(si) do { \
238         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
239         if (sic != NULL) { \
240                 swrap_mutex_unlock(&sockets_si_global); \
241         } else { \
242                 abort(); \
243         } \
244 } while(0)
245
246 #if defined(HAVE_GETTIMEOFDAY_TZ) || defined(HAVE_GETTIMEOFDAY_TZ_VOID)
247 #define swrapGetTimeOfDay(tval) gettimeofday(tval,NULL)
248 #else
249 #define swrapGetTimeOfDay(tval) gettimeofday(tval)
250 #endif
251
252 /* we need to use a very terse format here as IRIX 6.4 silently
253    truncates names to 16 chars, so if we use a longer name then we
254    can't tell which port a packet came from with recvfrom()
255
256    with this format we have 8 chars left for the directory name
257 */
258 #define SOCKET_FORMAT "%c%02X%04X"
259 #define SOCKET_TYPE_CHAR_TCP            'T'
260 #define SOCKET_TYPE_CHAR_UDP            'U'
261 #define SOCKET_TYPE_CHAR_TCP_V6         'X'
262 #define SOCKET_TYPE_CHAR_UDP_V6         'Y'
263
264 /*
265  * Set the packet MTU to 1500 bytes for stream sockets to make it it easier to
266  * format PCAP capture files (as the caller will simply continue from here).
267  */
268 #define SOCKET_WRAPPER_MTU_DEFAULT 1500
269 #define SOCKET_WRAPPER_MTU_MIN     512
270 #define SOCKET_WRAPPER_MTU_MAX     32768
271
272 #define SOCKET_MAX_SOCKETS 1024
273
274 /*
275  * Maximum number of socket_info structures that can
276  * be used. Can be overriden by the environment variable
277  * SOCKET_WRAPPER_MAX_SOCKETS.
278  */
279 #define SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT 65535
280
281 #define SOCKET_WRAPPER_MAX_SOCKETS_LIMIT 262140
282
283 /* This limit is to avoid broadcast sendto() needing to stat too many
284  * files.  It may be raised (with a performance cost) to up to 254
285  * without changing the format above */
286 #define MAX_WRAPPED_INTERFACES 64
287
288 struct swrap_address {
289         socklen_t sa_socklen;
290         union {
291                 struct sockaddr s;
292                 struct sockaddr_in in;
293 #ifdef HAVE_IPV6
294                 struct sockaddr_in6 in6;
295 #endif
296                 struct sockaddr_un un;
297                 struct sockaddr_storage ss;
298         } sa;
299 };
300
301 static int first_free;
302
303 struct socket_info
304 {
305         /*
306          * Remember to update swrap_unix_scm_right_magic
307          * on any change.
308          */
309
310         int family;
311         int type;
312         int protocol;
313         int bound;
314         int bcast;
315         int is_server;
316         int connected;
317         int defer_connect;
318         int pktinfo;
319         int tcp_nodelay;
320         int listening;
321         int fd_passed;
322
323         /* The unix path so we can unlink it on close() */
324         struct sockaddr_un un_addr;
325
326         struct swrap_address bindname;
327         struct swrap_address myname;
328         struct swrap_address peername;
329
330         struct {
331                 unsigned long pck_snd;
332                 unsigned long pck_rcv;
333         } io;
334 };
335
336 struct socket_info_meta
337 {
338         unsigned int refcount;
339         int next_free;
340         /*
341          * As long as we don't use shared memory
342          * for the sockets array, we use
343          * sockets_si_global as a single mutex.
344          *
345          * pthread_mutex_t mutex;
346          */
347 };
348
349 struct socket_info_container
350 {
351         struct socket_info info;
352         struct socket_info_meta meta;
353 };
354
355 static struct socket_info_container *sockets;
356
357 static size_t socket_info_max = 0;
358
359 /*
360  * Allocate the socket array always on the limit value. We want it to be
361  * at least bigger than the default so if we reach the limit we can
362  * still deal with duplicate fds pointing to the same socket_info.
363  */
364 static size_t socket_fds_max = SOCKET_WRAPPER_MAX_SOCKETS_LIMIT;
365
366 /* Hash table to map fds to corresponding socket_info index */
367 static int *socket_fds_idx;
368
369 /* Mutex for syncronizing port selection during swrap_auto_bind() */
370 static pthread_mutex_t autobind_start_mutex = PTHREAD_MUTEX_INITIALIZER;
371
372 /* Mutex to guard the initialization of array of socket_info structures */
373 static pthread_mutex_t sockets_mutex = PTHREAD_MUTEX_INITIALIZER;
374
375 /* Mutex to guard the socket reset in swrap_remove_wrapper() */
376 static pthread_mutex_t socket_reset_mutex = PTHREAD_MUTEX_INITIALIZER;
377
378 /* Mutex to synchronize access to first free index in socket_info array */
379 static pthread_mutex_t first_free_mutex = PTHREAD_MUTEX_INITIALIZER;
380
381 /*
382  * Mutex to synchronize access to to socket_info structures
383  * We use a single global mutex in order to avoid leaking
384  * ~ 38M copy on write memory per fork.
385  * max_sockets=65535 * sizeof(struct socket_info_container)=592 = 38796720
386  */
387 static pthread_mutex_t sockets_si_global = PTHREAD_MUTEX_INITIALIZER;
388
389 /* Mutex to synchronize access to packet capture dump file */
390 static pthread_mutex_t pcap_dump_mutex = PTHREAD_MUTEX_INITIALIZER;
391
392 /* Mutex for synchronizing mtu value fetch*/
393 static pthread_mutex_t mtu_update_mutex = PTHREAD_MUTEX_INITIALIZER;
394
395 /* Function prototypes */
396
397 #if ! defined(HAVE_CONSTRUCTOR_ATTRIBUTE) && defined(HAVE_PRAGMA_INIT)
398 /* xlC and other oldschool compilers support (only) this */
399 #pragma init (swrap_constructor)
400 #endif
401 void swrap_constructor(void) CONSTRUCTOR_ATTRIBUTE;
402 #if ! defined(HAVE_DESTRUCTOR_ATTRIBUTE) && defined(HAVE_PRAGMA_FINI)
403 #pragma fini (swrap_destructor)
404 #endif
405 void swrap_destructor(void) DESTRUCTOR_ATTRIBUTE;
406
407 #ifndef HAVE_GETPROGNAME
408 static const char *getprogname(void)
409 {
410 #if defined(HAVE_PROGRAM_INVOCATION_SHORT_NAME)
411         return program_invocation_short_name;
412 #elif defined(HAVE_GETEXECNAME)
413         return getexecname();
414 #else
415         return NULL;
416 #endif /* HAVE_PROGRAM_INVOCATION_SHORT_NAME */
417 }
418 #endif /* HAVE_GETPROGNAME */
419
420 static void swrap_log(enum swrap_dbglvl_e dbglvl, const char *func, const char *format, ...) PRINTF_ATTRIBUTE(3, 4);
421 # define SWRAP_LOG(dbglvl, ...) swrap_log((dbglvl), __func__, __VA_ARGS__)
422
423 static void swrap_log(enum swrap_dbglvl_e dbglvl,
424                       const char *func,
425                       const char *format, ...)
426 {
427         char buffer[1024];
428         va_list va;
429         const char *d;
430         unsigned int lvl = 0;
431         const char *prefix = "SWRAP";
432         const char *progname = getprogname();
433
434         d = getenv("SOCKET_WRAPPER_DEBUGLEVEL");
435         if (d != NULL) {
436                 lvl = atoi(d);
437         }
438
439         if (lvl < dbglvl) {
440                 return;
441         }
442
443         va_start(va, format);
444         vsnprintf(buffer, sizeof(buffer), format, va);
445         va_end(va);
446
447         switch (dbglvl) {
448                 case SWRAP_LOG_ERROR:
449                         prefix = "SWRAP_ERROR";
450                         break;
451                 case SWRAP_LOG_WARN:
452                         prefix = "SWRAP_WARN";
453                         break;
454                 case SWRAP_LOG_DEBUG:
455                         prefix = "SWRAP_DEBUG";
456                         break;
457                 case SWRAP_LOG_TRACE:
458                         prefix = "SWRAP_TRACE";
459                         break;
460         }
461
462         if (progname == NULL) {
463                 progname = "<unknown>";
464         }
465
466         fprintf(stderr,
467                 "%s[%s (%u)] - %s: %s\n",
468                 prefix,
469                 progname,
470                 (unsigned int)getpid(),
471                 func,
472                 buffer);
473 }
474
475 /*********************************************************
476  * SWRAP LOADING LIBC FUNCTIONS
477  *********************************************************/
478
479 #include <dlfcn.h>
480
481 #ifdef HAVE_ACCEPT4
482 typedef int (*__libc_accept4)(int sockfd,
483                               struct sockaddr *addr,
484                               socklen_t *addrlen,
485                               int flags);
486 #else
487 typedef int (*__libc_accept)(int sockfd,
488                              struct sockaddr *addr,
489                              socklen_t *addrlen);
490 #endif
491 typedef int (*__libc_bind)(int sockfd,
492                            const struct sockaddr *addr,
493                            socklen_t addrlen);
494 typedef int (*__libc_close)(int fd);
495 #ifdef HAVE___CLOSE_NOCANCEL
496 typedef int (*__libc___close_nocancel)(int fd);
497 #endif
498 typedef int (*__libc_connect)(int sockfd,
499                               const struct sockaddr *addr,
500                               socklen_t addrlen);
501 typedef int (*__libc_dup)(int fd);
502 typedef int (*__libc_dup2)(int oldfd, int newfd);
503 typedef int (*__libc_fcntl)(int fd, int cmd, ...);
504 typedef FILE *(*__libc_fopen)(const char *name, const char *mode);
505 #ifdef HAVE_FOPEN64
506 typedef FILE *(*__libc_fopen64)(const char *name, const char *mode);
507 #endif
508 #ifdef HAVE_EVENTFD
509 typedef int (*__libc_eventfd)(int count, int flags);
510 #endif
511 typedef int (*__libc_getpeername)(int sockfd,
512                                   struct sockaddr *addr,
513                                   socklen_t *addrlen);
514 typedef int (*__libc_getsockname)(int sockfd,
515                                   struct sockaddr *addr,
516                                   socklen_t *addrlen);
517 typedef int (*__libc_getsockopt)(int sockfd,
518                                int level,
519                                int optname,
520                                void *optval,
521                                socklen_t *optlen);
522 typedef int (*__libc_ioctl)(int d, unsigned long int request, ...);
523 typedef int (*__libc_listen)(int sockfd, int backlog);
524 typedef int (*__libc_open)(const char *pathname, int flags, ...);
525 #ifdef HAVE_OPEN64
526 typedef int (*__libc_open64)(const char *pathname, int flags, ...);
527 #endif /* HAVE_OPEN64 */
528 typedef int (*__libc_openat)(int dirfd, const char *path, int flags, ...);
529 typedef int (*__libc_pipe)(int pipefd[2]);
530 typedef int (*__libc_read)(int fd, void *buf, size_t count);
531 typedef ssize_t (*__libc_readv)(int fd, const struct iovec *iov, int iovcnt);
532 typedef int (*__libc_recv)(int sockfd, void *buf, size_t len, int flags);
533 typedef int (*__libc_recvfrom)(int sockfd,
534                              void *buf,
535                              size_t len,
536                              int flags,
537                              struct sockaddr *src_addr,
538                              socklen_t *addrlen);
539 typedef int (*__libc_recvmsg)(int sockfd, const struct msghdr *msg, int flags);
540 typedef int (*__libc_send)(int sockfd, const void *buf, size_t len, int flags);
541 typedef int (*__libc_sendmsg)(int sockfd, const struct msghdr *msg, int flags);
542 typedef int (*__libc_sendto)(int sockfd,
543                            const void *buf,
544                            size_t len,
545                            int flags,
546                            const  struct sockaddr *dst_addr,
547                            socklen_t addrlen);
548 typedef int (*__libc_setsockopt)(int sockfd,
549                                int level,
550                                int optname,
551                                const void *optval,
552                                socklen_t optlen);
553 #ifdef HAVE_SIGNALFD
554 typedef int (*__libc_signalfd)(int fd, const sigset_t *mask, int flags);
555 #endif
556 typedef int (*__libc_socket)(int domain, int type, int protocol);
557 typedef int (*__libc_socketpair)(int domain, int type, int protocol, int sv[2]);
558 #ifdef HAVE_TIMERFD_CREATE
559 typedef int (*__libc_timerfd_create)(int clockid, int flags);
560 #endif
561 typedef ssize_t (*__libc_write)(int fd, const void *buf, size_t count);
562 typedef ssize_t (*__libc_writev)(int fd, const struct iovec *iov, int iovcnt);
563
564 #define SWRAP_SYMBOL_ENTRY(i) \
565         union { \
566                 __libc_##i f; \
567                 void *obj; \
568         } _libc_##i
569
570 struct swrap_libc_symbols {
571 #ifdef HAVE_ACCEPT4
572         SWRAP_SYMBOL_ENTRY(accept4);
573 #else
574         SWRAP_SYMBOL_ENTRY(accept);
575 #endif
576         SWRAP_SYMBOL_ENTRY(bind);
577         SWRAP_SYMBOL_ENTRY(close);
578 #ifdef HAVE___CLOSE_NOCANCEL
579         SWRAP_SYMBOL_ENTRY(__close_nocancel);
580 #endif
581         SWRAP_SYMBOL_ENTRY(connect);
582         SWRAP_SYMBOL_ENTRY(dup);
583         SWRAP_SYMBOL_ENTRY(dup2);
584         SWRAP_SYMBOL_ENTRY(fcntl);
585         SWRAP_SYMBOL_ENTRY(fopen);
586 #ifdef HAVE_FOPEN64
587         SWRAP_SYMBOL_ENTRY(fopen64);
588 #endif
589 #ifdef HAVE_EVENTFD
590         SWRAP_SYMBOL_ENTRY(eventfd);
591 #endif
592         SWRAP_SYMBOL_ENTRY(getpeername);
593         SWRAP_SYMBOL_ENTRY(getsockname);
594         SWRAP_SYMBOL_ENTRY(getsockopt);
595         SWRAP_SYMBOL_ENTRY(ioctl);
596         SWRAP_SYMBOL_ENTRY(listen);
597         SWRAP_SYMBOL_ENTRY(open);
598 #ifdef HAVE_OPEN64
599         SWRAP_SYMBOL_ENTRY(open64);
600 #endif
601         SWRAP_SYMBOL_ENTRY(openat);
602         SWRAP_SYMBOL_ENTRY(pipe);
603         SWRAP_SYMBOL_ENTRY(read);
604         SWRAP_SYMBOL_ENTRY(readv);
605         SWRAP_SYMBOL_ENTRY(recv);
606         SWRAP_SYMBOL_ENTRY(recvfrom);
607         SWRAP_SYMBOL_ENTRY(recvmsg);
608         SWRAP_SYMBOL_ENTRY(send);
609         SWRAP_SYMBOL_ENTRY(sendmsg);
610         SWRAP_SYMBOL_ENTRY(sendto);
611         SWRAP_SYMBOL_ENTRY(setsockopt);
612 #ifdef HAVE_SIGNALFD
613         SWRAP_SYMBOL_ENTRY(signalfd);
614 #endif
615         SWRAP_SYMBOL_ENTRY(socket);
616         SWRAP_SYMBOL_ENTRY(socketpair);
617 #ifdef HAVE_TIMERFD_CREATE
618         SWRAP_SYMBOL_ENTRY(timerfd_create);
619 #endif
620         SWRAP_SYMBOL_ENTRY(write);
621         SWRAP_SYMBOL_ENTRY(writev);
622 };
623
624 struct swrap {
625         struct {
626                 void *handle;
627                 void *socket_handle;
628                 struct swrap_libc_symbols symbols;
629         } libc;
630 };
631
632 static struct swrap swrap;
633
634 /* prototypes */
635 static char *socket_wrapper_dir(void);
636
637 #define LIBC_NAME "libc.so"
638
639 enum swrap_lib {
640     SWRAP_LIBC,
641     SWRAP_LIBSOCKET,
642 };
643
644 static const char *swrap_str_lib(enum swrap_lib lib)
645 {
646         switch (lib) {
647         case SWRAP_LIBC:
648                 return "libc";
649         case SWRAP_LIBSOCKET:
650                 return "libsocket";
651         }
652
653         /* Compiler would warn us about unhandled enum value if we get here */
654         return "unknown";
655 }
656
657 static void *swrap_load_lib_handle(enum swrap_lib lib)
658 {
659         int flags = RTLD_LAZY;
660         void *handle = NULL;
661         int i;
662
663 #ifdef RTLD_DEEPBIND
664         const char *env_preload = getenv("LD_PRELOAD");
665         const char *env_deepbind = getenv("SOCKET_WRAPPER_DISABLE_DEEPBIND");
666         bool enable_deepbind = true;
667
668         /* Don't do a deepbind if we run with libasan */
669         if (env_preload != NULL && strlen(env_preload) < 1024) {
670                 const char *p = strstr(env_preload, "libasan.so");
671                 if (p != NULL) {
672                         enable_deepbind = false;
673                 }
674         }
675
676         if (env_deepbind != NULL && strlen(env_deepbind) >= 1) {
677                 enable_deepbind = false;
678         }
679
680         if (enable_deepbind) {
681                 flags |= RTLD_DEEPBIND;
682         }
683 #endif
684
685         switch (lib) {
686         case SWRAP_LIBSOCKET:
687 #ifdef HAVE_LIBSOCKET
688                 handle = swrap.libc.socket_handle;
689                 if (handle == NULL) {
690                         for (i = 10; i >= 0; i--) {
691                                 char soname[256] = {0};
692
693                                 snprintf(soname, sizeof(soname), "libsocket.so.%d", i);
694                                 handle = dlopen(soname, flags);
695                                 if (handle != NULL) {
696                                         break;
697                                 }
698                         }
699
700                         swrap.libc.socket_handle = handle;
701                 }
702                 break;
703 #endif
704         case SWRAP_LIBC:
705                 handle = swrap.libc.handle;
706 #ifdef LIBC_SO
707                 if (handle == NULL) {
708                         handle = dlopen(LIBC_SO, flags);
709
710                         swrap.libc.handle = handle;
711                 }
712 #endif
713                 if (handle == NULL) {
714                         for (i = 10; i >= 0; i--) {
715                                 char soname[256] = {0};
716
717                                 snprintf(soname, sizeof(soname), "libc.so.%d", i);
718                                 handle = dlopen(soname, flags);
719                                 if (handle != NULL) {
720                                         break;
721                                 }
722                         }
723
724                         swrap.libc.handle = handle;
725                 }
726                 break;
727         }
728
729         if (handle == NULL) {
730 #ifdef RTLD_NEXT
731                 handle = swrap.libc.handle = swrap.libc.socket_handle = RTLD_NEXT;
732 #else
733                 SWRAP_LOG(SWRAP_LOG_ERROR,
734                           "Failed to dlopen library: %s",
735                           dlerror());
736                 exit(-1);
737 #endif
738         }
739
740         return handle;
741 }
742
743 static void *_swrap_bind_symbol(enum swrap_lib lib, const char *fn_name)
744 {
745         void *handle;
746         void *func;
747
748         handle = swrap_load_lib_handle(lib);
749
750         func = dlsym(handle, fn_name);
751         if (func == NULL) {
752                 SWRAP_LOG(SWRAP_LOG_ERROR,
753                           "Failed to find %s: %s",
754                           fn_name,
755                           dlerror());
756                 exit(-1);
757         }
758
759         SWRAP_LOG(SWRAP_LOG_TRACE,
760                   "Loaded %s from %s",
761                   fn_name,
762                   swrap_str_lib(lib));
763
764         return func;
765 }
766
767 #define swrap_mutex_lock(m) _swrap_mutex_lock(m, #m, __func__, __LINE__)
768 static void _swrap_mutex_lock(pthread_mutex_t *mutex, const char *name, const char *caller, unsigned line)
769 {
770         int ret;
771
772         ret = pthread_mutex_lock(mutex);
773         if (ret != 0) {
774                 SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't lock pthread mutex(%s) - %s",
775                           getpid(), getppid(), caller, line, name, strerror(ret));
776                 abort();
777         }
778 }
779
780 #define swrap_mutex_unlock(m) _swrap_mutex_unlock(m, #m, __func__, __LINE__)
781 static void _swrap_mutex_unlock(pthread_mutex_t *mutex, const char *name, const char *caller, unsigned line)
782 {
783         int ret;
784
785         ret = pthread_mutex_unlock(mutex);
786         if (ret != 0) {
787                 SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't unlock pthread mutex(%s) - %s",
788                           getpid(), getppid(), caller, line, name, strerror(ret));
789                 abort();
790         }
791 }
792
793 /*
794  * These macros have a thread race condition on purpose!
795  *
796  * This is an optimization to avoid locking each time we check if the symbol is
797  * bound.
798  */
799 #define _swrap_bind_symbol_generic(lib, sym_name) do { \
800         swrap.libc.symbols._libc_##sym_name.obj = \
801                 _swrap_bind_symbol(lib, #sym_name); \
802 } while(0);
803
804 #define swrap_bind_symbol_libc(sym_name) \
805         _swrap_bind_symbol_generic(SWRAP_LIBC, sym_name)
806
807 #define swrap_bind_symbol_libsocket(sym_name) \
808         _swrap_bind_symbol_generic(SWRAP_LIBSOCKET, sym_name)
809
810 static void swrap_bind_symbol_all(void);
811
812 /****************************************************************************
813  *                               IMPORTANT
814  ****************************************************************************
815  *
816  * Functions especially from libc need to be loaded individually, you can't
817  * load all at once or gdb will segfault at startup. The same applies to
818  * valgrind and has probably something todo with with the linker.  So we need
819  * load each function at the point it is called the first time.
820  *
821  ****************************************************************************/
822
823 #ifdef HAVE_ACCEPT4
824 static int libc_accept4(int sockfd,
825                         struct sockaddr *addr,
826                         socklen_t *addrlen,
827                         int flags)
828 {
829         swrap_bind_symbol_all();
830
831         return swrap.libc.symbols._libc_accept4.f(sockfd, addr, addrlen, flags);
832 }
833
834 #else /* HAVE_ACCEPT4 */
835
836 static int libc_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
837 {
838         swrap_bind_symbol_all();
839
840         return swrap.libc.symbols._libc_accept.f(sockfd, addr, addrlen);
841 }
842 #endif /* HAVE_ACCEPT4 */
843
844 static int libc_bind(int sockfd,
845                      const struct sockaddr *addr,
846                      socklen_t addrlen)
847 {
848         swrap_bind_symbol_all();
849
850         return swrap.libc.symbols._libc_bind.f(sockfd, addr, addrlen);
851 }
852
853 static int libc_close(int fd)
854 {
855         swrap_bind_symbol_all();
856
857         return swrap.libc.symbols._libc_close.f(fd);
858 }
859
860 #ifdef HAVE___CLOSE_NOCANCEL
861 static int libc___close_nocancel(int fd)
862 {
863         swrap_bind_symbol_all();
864
865         return swrap.libc.symbols._libc___close_nocancel.f(fd);
866 }
867 #endif /* HAVE___CLOSE_NOCANCEL */
868
869 static int libc_connect(int sockfd,
870                         const struct sockaddr *addr,
871                         socklen_t addrlen)
872 {
873         swrap_bind_symbol_all();
874
875         return swrap.libc.symbols._libc_connect.f(sockfd, addr, addrlen);
876 }
877
878 static int libc_dup(int fd)
879 {
880         swrap_bind_symbol_all();
881
882         return swrap.libc.symbols._libc_dup.f(fd);
883 }
884
885 static int libc_dup2(int oldfd, int newfd)
886 {
887         swrap_bind_symbol_all();
888
889         return swrap.libc.symbols._libc_dup2.f(oldfd, newfd);
890 }
891
892 #ifdef HAVE_EVENTFD
893 static int libc_eventfd(int count, int flags)
894 {
895         swrap_bind_symbol_all();
896
897         return swrap.libc.symbols._libc_eventfd.f(count, flags);
898 }
899 #endif
900
901 DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
902 static int libc_vfcntl(int fd, int cmd, va_list ap)
903 {
904         void *arg;
905         int rc;
906
907         swrap_bind_symbol_all();
908
909         arg = va_arg(ap, void *);
910
911         rc = swrap.libc.symbols._libc_fcntl.f(fd, cmd, arg);
912
913         return rc;
914 }
915
916 static int libc_getpeername(int sockfd,
917                             struct sockaddr *addr,
918                             socklen_t *addrlen)
919 {
920         swrap_bind_symbol_all();
921
922         return swrap.libc.symbols._libc_getpeername.f(sockfd, addr, addrlen);
923 }
924
925 static int libc_getsockname(int sockfd,
926                             struct sockaddr *addr,
927                             socklen_t *addrlen)
928 {
929         swrap_bind_symbol_all();
930
931         return swrap.libc.symbols._libc_getsockname.f(sockfd, addr, addrlen);
932 }
933
934 static int libc_getsockopt(int sockfd,
935                            int level,
936                            int optname,
937                            void *optval,
938                            socklen_t *optlen)
939 {
940         swrap_bind_symbol_all();
941
942         return swrap.libc.symbols._libc_getsockopt.f(sockfd,
943                                                      level,
944                                                      optname,
945                                                      optval,
946                                                      optlen);
947 }
948
949 DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
950 static int libc_vioctl(int d, unsigned long int request, va_list ap)
951 {
952         void *arg;
953         int rc;
954
955         swrap_bind_symbol_all();
956
957         arg = va_arg(ap, void *);
958
959         rc = swrap.libc.symbols._libc_ioctl.f(d, request, arg);
960
961         return rc;
962 }
963
964 static int libc_listen(int sockfd, int backlog)
965 {
966         swrap_bind_symbol_all();
967
968         return swrap.libc.symbols._libc_listen.f(sockfd, backlog);
969 }
970
971 static FILE *libc_fopen(const char *name, const char *mode)
972 {
973         swrap_bind_symbol_all();
974
975         return swrap.libc.symbols._libc_fopen.f(name, mode);
976 }
977
978 #ifdef HAVE_FOPEN64
979 static FILE *libc_fopen64(const char *name, const char *mode)
980 {
981         swrap_bind_symbol_all();
982
983         return swrap.libc.symbols._libc_fopen64.f(name, mode);
984 }
985 #endif /* HAVE_FOPEN64 */
986
987 static void swrap_inject_o_largefile(int *flags)
988 {
989         (void)*flags; /* maybe unused */
990 #if SIZE_MAX == 0xffffffffUL && defined(O_LARGEFILE)
991 #ifdef O_PATH
992         if (((*flags) & O_PATH) == 0)
993 #endif
994         {
995                 *flags |= O_LARGEFILE;
996         }
997 #endif
998 }
999
1000 static int libc_vopen(const char *pathname, int flags, va_list ap)
1001 {
1002         int mode = 0;
1003         int fd;
1004
1005         swrap_bind_symbol_all();
1006
1007         swrap_inject_o_largefile(&flags);
1008
1009         if (flags & O_CREAT) {
1010                 mode = va_arg(ap, int);
1011         }
1012         fd = swrap.libc.symbols._libc_open.f(pathname, flags, (mode_t)mode);
1013
1014         return fd;
1015 }
1016
1017 static int libc_open(const char *pathname, int flags, ...)
1018 {
1019         va_list ap;
1020         int fd;
1021
1022         va_start(ap, flags);
1023         fd = libc_vopen(pathname, flags, ap);
1024         va_end(ap);
1025
1026         return fd;
1027 }
1028
1029 #ifdef HAVE_OPEN64
1030 static int libc_vopen64(const char *pathname, int flags, va_list ap)
1031 {
1032         int mode = 0;
1033         int fd;
1034
1035         swrap_bind_symbol_all();
1036
1037         swrap_inject_o_largefile(&flags);
1038
1039         if (flags & O_CREAT) {
1040                 mode = va_arg(ap, int);
1041         }
1042         fd = swrap.libc.symbols._libc_open64.f(pathname, flags, (mode_t)mode);
1043
1044         return fd;
1045 }
1046 #endif /* HAVE_OPEN64 */
1047
1048 static int libc_vopenat(int dirfd, const char *path, int flags, va_list ap)
1049 {
1050         int mode = 0;
1051         int fd;
1052
1053         swrap_bind_symbol_all();
1054
1055         swrap_inject_o_largefile(&flags);
1056
1057         if (flags & O_CREAT) {
1058                 mode = va_arg(ap, int);
1059         }
1060         fd = swrap.libc.symbols._libc_openat.f(dirfd,
1061                                                path,
1062                                                flags,
1063                                                (mode_t)mode);
1064
1065         return fd;
1066 }
1067
1068 #if 0
1069 static int libc_openat(int dirfd, const char *path, int flags, ...)
1070 {
1071         va_list ap;
1072         int fd;
1073
1074         va_start(ap, flags);
1075         fd = libc_vopenat(dirfd, path, flags, ap);
1076         va_end(ap);
1077
1078         return fd;
1079 }
1080 #endif
1081
1082 static int libc_pipe(int pipefd[2])
1083 {
1084         swrap_bind_symbol_all();
1085
1086         return swrap.libc.symbols._libc_pipe.f(pipefd);
1087 }
1088
1089 static int libc_read(int fd, void *buf, size_t count)
1090 {
1091         swrap_bind_symbol_all();
1092
1093         return swrap.libc.symbols._libc_read.f(fd, buf, count);
1094 }
1095
1096 static ssize_t libc_readv(int fd, const struct iovec *iov, int iovcnt)
1097 {
1098         swrap_bind_symbol_all();
1099
1100         return swrap.libc.symbols._libc_readv.f(fd, iov, iovcnt);
1101 }
1102
1103 static int libc_recv(int sockfd, void *buf, size_t len, int flags)
1104 {
1105         swrap_bind_symbol_all();
1106
1107         return swrap.libc.symbols._libc_recv.f(sockfd, buf, len, flags);
1108 }
1109
1110 static int libc_recvfrom(int sockfd,
1111                          void *buf,
1112                          size_t len,
1113                          int flags,
1114                          struct sockaddr *src_addr,
1115                          socklen_t *addrlen)
1116 {
1117         swrap_bind_symbol_all();
1118
1119         return swrap.libc.symbols._libc_recvfrom.f(sockfd,
1120                                                    buf,
1121                                                    len,
1122                                                    flags,
1123                                                    src_addr,
1124                                                    addrlen);
1125 }
1126
1127 static int libc_recvmsg(int sockfd, struct msghdr *msg, int flags)
1128 {
1129         swrap_bind_symbol_all();
1130
1131         return swrap.libc.symbols._libc_recvmsg.f(sockfd, msg, flags);
1132 }
1133
1134 static int libc_send(int sockfd, const void *buf, size_t len, int flags)
1135 {
1136         swrap_bind_symbol_all();
1137
1138         return swrap.libc.symbols._libc_send.f(sockfd, buf, len, flags);
1139 }
1140
1141 static int libc_sendmsg(int sockfd, const struct msghdr *msg, int flags)
1142 {
1143         swrap_bind_symbol_all();
1144
1145         return swrap.libc.symbols._libc_sendmsg.f(sockfd, msg, flags);
1146 }
1147
1148 static int libc_sendto(int sockfd,
1149                        const void *buf,
1150                        size_t len,
1151                        int flags,
1152                        const  struct sockaddr *dst_addr,
1153                        socklen_t addrlen)
1154 {
1155         swrap_bind_symbol_all();
1156
1157         return swrap.libc.symbols._libc_sendto.f(sockfd,
1158                                                  buf,
1159                                                  len,
1160                                                  flags,
1161                                                  dst_addr,
1162                                                  addrlen);
1163 }
1164
1165 static int libc_setsockopt(int sockfd,
1166                            int level,
1167                            int optname,
1168                            const void *optval,
1169                            socklen_t optlen)
1170 {
1171         swrap_bind_symbol_all();
1172
1173         return swrap.libc.symbols._libc_setsockopt.f(sockfd,
1174                                                      level,
1175                                                      optname,
1176                                                      optval,
1177                                                      optlen);
1178 }
1179
1180 #ifdef HAVE_SIGNALFD
1181 static int libc_signalfd(int fd, const sigset_t *mask, int flags)
1182 {
1183         swrap_bind_symbol_all();
1184
1185         return swrap.libc.symbols._libc_signalfd.f(fd, mask, flags);
1186 }
1187 #endif
1188
1189 static int libc_socket(int domain, int type, int protocol)
1190 {
1191         swrap_bind_symbol_all();
1192
1193         return swrap.libc.symbols._libc_socket.f(domain, type, protocol);
1194 }
1195
1196 static int libc_socketpair(int domain, int type, int protocol, int sv[2])
1197 {
1198         swrap_bind_symbol_all();
1199
1200         return swrap.libc.symbols._libc_socketpair.f(domain, type, protocol, sv);
1201 }
1202
1203 #ifdef HAVE_TIMERFD_CREATE
1204 static int libc_timerfd_create(int clockid, int flags)
1205 {
1206         swrap_bind_symbol_all();
1207
1208         return swrap.libc.symbols._libc_timerfd_create.f(clockid, flags);
1209 }
1210 #endif
1211
1212 static ssize_t libc_write(int fd, const void *buf, size_t count)
1213 {
1214         swrap_bind_symbol_all();
1215
1216         return swrap.libc.symbols._libc_write.f(fd, buf, count);
1217 }
1218
1219 static ssize_t libc_writev(int fd, const struct iovec *iov, int iovcnt)
1220 {
1221         swrap_bind_symbol_all();
1222
1223         return swrap.libc.symbols._libc_writev.f(fd, iov, iovcnt);
1224 }
1225
1226 /* DO NOT call this function during library initialization! */
1227 static void __swrap_bind_symbol_all_once(void)
1228 {
1229 #ifdef HAVE_ACCEPT4
1230         swrap_bind_symbol_libsocket(accept4);
1231 #else
1232         swrap_bind_symbol_libsocket(accept);
1233 #endif
1234         swrap_bind_symbol_libsocket(bind);
1235         swrap_bind_symbol_libc(close);
1236 #ifdef HAVE___CLOSE_NOCANCEL
1237         swrap_bind_symbol_libc(__close_nocancel);
1238 #endif
1239         swrap_bind_symbol_libsocket(connect);
1240         swrap_bind_symbol_libc(dup);
1241         swrap_bind_symbol_libc(dup2);
1242         swrap_bind_symbol_libc(fcntl);
1243         swrap_bind_symbol_libc(fopen);
1244 #ifdef HAVE_FOPEN64
1245         swrap_bind_symbol_libc(fopen64);
1246 #endif
1247 #ifdef HAVE_EVENTFD
1248         swrap_bind_symbol_libc(eventfd);
1249 #endif
1250         swrap_bind_symbol_libsocket(getpeername);
1251         swrap_bind_symbol_libsocket(getsockname);
1252         swrap_bind_symbol_libsocket(getsockopt);
1253         swrap_bind_symbol_libc(ioctl);
1254         swrap_bind_symbol_libsocket(listen);
1255         swrap_bind_symbol_libc(open);
1256 #ifdef HAVE_OPEN64
1257         swrap_bind_symbol_libc(open64);
1258 #endif
1259         swrap_bind_symbol_libc(openat);
1260         swrap_bind_symbol_libsocket(pipe);
1261         swrap_bind_symbol_libc(read);
1262         swrap_bind_symbol_libsocket(readv);
1263         swrap_bind_symbol_libsocket(recv);
1264         swrap_bind_symbol_libsocket(recvfrom);
1265         swrap_bind_symbol_libsocket(recvmsg);
1266         swrap_bind_symbol_libsocket(send);
1267         swrap_bind_symbol_libsocket(sendmsg);
1268         swrap_bind_symbol_libsocket(sendto);
1269         swrap_bind_symbol_libsocket(setsockopt);
1270 #ifdef HAVE_SIGNALFD
1271         swrap_bind_symbol_libsocket(signalfd);
1272 #endif
1273         swrap_bind_symbol_libsocket(socket);
1274         swrap_bind_symbol_libsocket(socketpair);
1275 #ifdef HAVE_TIMERFD_CREATE
1276         swrap_bind_symbol_libc(timerfd_create);
1277 #endif
1278         swrap_bind_symbol_libc(write);
1279         swrap_bind_symbol_libsocket(writev);
1280 }
1281
1282 static void swrap_bind_symbol_all(void)
1283 {
1284         static pthread_once_t all_symbol_binding_once = PTHREAD_ONCE_INIT;
1285
1286         pthread_once(&all_symbol_binding_once, __swrap_bind_symbol_all_once);
1287 }
1288
1289 /*********************************************************
1290  * SWRAP HELPER FUNCTIONS
1291  *********************************************************/
1292
1293 /*
1294  * We return 127.0.0.0 (default) or 10.53.57.0.
1295  *
1296  * This can be controlled by:
1297  * SOCKET_WRAPPER_IPV4_NETWORK=127.0.0.0 (default)
1298  * or
1299  * SOCKET_WRAPPER_IPV4_NETWORK=10.53.57.0
1300  */
1301 static in_addr_t swrap_ipv4_net(void)
1302 {
1303         static int initialized;
1304         static in_addr_t hv;
1305         const char *net_str = NULL;
1306         struct in_addr nv;
1307         int ret;
1308
1309         if (initialized) {
1310                 return hv;
1311         }
1312         initialized = 1;
1313
1314         net_str = getenv("SOCKET_WRAPPER_IPV4_NETWORK");
1315         if (net_str == NULL) {
1316                 net_str = "127.0.0.0";
1317         }
1318
1319         ret = inet_pton(AF_INET, net_str, &nv);
1320         if (ret <= 0) {
1321                 SWRAP_LOG(SWRAP_LOG_ERROR,
1322                           "INVALID IPv4 Network [%s]",
1323                           net_str);
1324                 abort();
1325         }
1326
1327         hv = ntohl(nv.s_addr);
1328
1329         switch (hv) {
1330         case 0x7f000000:
1331                 /* 127.0.0.0 */
1332                 break;
1333         case 0x0a353900:
1334                 /* 10.53.57.0 */
1335                 break;
1336         default:
1337                 SWRAP_LOG(SWRAP_LOG_ERROR,
1338                           "INVALID IPv4 Network [%s][0x%x] should be "
1339                           "127.0.0.0 or 10.53.57.0",
1340                           net_str, (unsigned)hv);
1341                 abort();
1342         }
1343
1344         return hv;
1345 }
1346
1347 /*
1348  * This returns 127.255.255.255 or 10.255.255.255
1349  */
1350 static in_addr_t swrap_ipv4_bcast(void)
1351 {
1352         in_addr_t hv;
1353
1354         hv = swrap_ipv4_net();
1355         hv |= IN_CLASSA_HOST;
1356
1357         return hv;
1358 }
1359
1360 /*
1361  * This returns 127.0.0.${iface} or 10.53.57.${iface}
1362  */
1363 static in_addr_t swrap_ipv4_iface(unsigned int iface)
1364 {
1365         in_addr_t hv;
1366
1367         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
1368                 SWRAP_LOG(SWRAP_LOG_ERROR,
1369                           "swrap_ipv4_iface(%u) invalid!",
1370                           iface);
1371                 abort();
1372                 return -1;
1373         }
1374
1375         hv = swrap_ipv4_net();
1376         hv |= iface;
1377
1378         return hv;
1379 }
1380
1381 #ifdef HAVE_IPV6
1382 /*
1383  * FD00::5357:5FXX
1384  */
1385 static const struct in6_addr *swrap_ipv6(void)
1386 {
1387         static struct in6_addr v;
1388         static int initialized;
1389         int ret;
1390
1391         if (initialized) {
1392                 return &v;
1393         }
1394         initialized = 1;
1395
1396         ret = inet_pton(AF_INET6, "FD00::5357:5F00", &v);
1397         if (ret <= 0) {
1398                 abort();
1399         }
1400
1401         return &v;
1402 }
1403 #endif
1404
1405 static void set_port(int family, int prt, struct swrap_address *addr)
1406 {
1407         switch (family) {
1408         case AF_INET:
1409                 addr->sa.in.sin_port = htons(prt);
1410                 break;
1411 #ifdef HAVE_IPV6
1412         case AF_INET6:
1413                 addr->sa.in6.sin6_port = htons(prt);
1414                 break;
1415 #endif
1416         }
1417 }
1418
1419 static size_t socket_length(int family)
1420 {
1421         switch (family) {
1422         case AF_INET:
1423                 return sizeof(struct sockaddr_in);
1424 #ifdef HAVE_IPV6
1425         case AF_INET6:
1426                 return sizeof(struct sockaddr_in6);
1427 #endif
1428         }
1429         return 0;
1430 }
1431
1432 static struct socket_info *swrap_get_socket_info(int si_index)
1433 {
1434         return (struct socket_info *)(&(sockets[si_index].info));
1435 }
1436
1437 static int swrap_get_refcount(struct socket_info *si)
1438 {
1439         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1440         return sic->meta.refcount;
1441 }
1442
1443 static void swrap_inc_refcount(struct socket_info *si)
1444 {
1445         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1446
1447         sic->meta.refcount += 1;
1448 }
1449
1450 static void swrap_dec_refcount(struct socket_info *si)
1451 {
1452         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1453
1454         sic->meta.refcount -= 1;
1455 }
1456
1457 static int swrap_get_next_free(struct socket_info *si)
1458 {
1459         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1460
1461         return sic->meta.next_free;
1462 }
1463
1464 static void swrap_set_next_free(struct socket_info *si, int next_free)
1465 {
1466         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1467
1468         sic->meta.next_free = next_free;
1469 }
1470
1471 static int swrap_un_path(struct sockaddr_un *un,
1472                          const char *swrap_dir,
1473                          char type,
1474                          unsigned int iface,
1475                          unsigned int prt)
1476 {
1477         int ret;
1478
1479         ret = snprintf(un->sun_path,
1480                        sizeof(un->sun_path),
1481                        "%s/"SOCKET_FORMAT,
1482                        swrap_dir,
1483                        type,
1484                        iface,
1485                        prt);
1486         if ((size_t)ret >= sizeof(un->sun_path)) {
1487                 return ENAMETOOLONG;
1488         }
1489
1490         return 0;
1491 }
1492
1493 static int swrap_un_path_EINVAL(struct sockaddr_un *un,
1494                                 const char *swrap_dir)
1495 {
1496         int ret;
1497
1498         ret = snprintf(un->sun_path,
1499                        sizeof(un->sun_path),
1500                        "%s/EINVAL",
1501                        swrap_dir);
1502
1503         if ((size_t)ret >= sizeof(un->sun_path)) {
1504                 return ENAMETOOLONG;
1505         }
1506
1507         return 0;
1508 }
1509
1510 static bool swrap_dir_usable(const char *swrap_dir)
1511 {
1512         struct sockaddr_un un;
1513         int ret;
1514
1515         ret = swrap_un_path(&un, swrap_dir, SOCKET_TYPE_CHAR_TCP, 0, 0);
1516         if (ret == 0) {
1517                 return true;
1518         }
1519
1520         ret = swrap_un_path_EINVAL(&un, swrap_dir);
1521         if (ret == 0) {
1522                 return true;
1523         }
1524
1525         return false;
1526 }
1527
1528 static char *socket_wrapper_dir(void)
1529 {
1530         char *swrap_dir = NULL;
1531         char *s = getenv("SOCKET_WRAPPER_DIR");
1532         char *t;
1533         bool ok;
1534
1535         if (s == NULL || s[0] == '\0') {
1536                 SWRAP_LOG(SWRAP_LOG_WARN, "SOCKET_WRAPPER_DIR not set");
1537                 return NULL;
1538         }
1539
1540         swrap_dir = realpath(s, NULL);
1541         if (swrap_dir == NULL) {
1542                 SWRAP_LOG(SWRAP_LOG_ERROR,
1543                           "Unable to resolve socket_wrapper dir path: %s - %s",
1544                           s,
1545                           strerror(errno));
1546                 abort();
1547         }
1548
1549         ok = swrap_dir_usable(swrap_dir);
1550         if (ok) {
1551                 goto done;
1552         }
1553
1554         free(swrap_dir);
1555
1556         ok = swrap_dir_usable(s);
1557         if (!ok) {
1558                 SWRAP_LOG(SWRAP_LOG_ERROR, "SOCKET_WRAPPER_DIR is too long");
1559                 abort();
1560         }
1561
1562         t = getenv("SOCKET_WRAPPER_DIR_ALLOW_ORIG");
1563         if (t == NULL) {
1564                 SWRAP_LOG(SWRAP_LOG_ERROR,
1565                           "realpath(SOCKET_WRAPPER_DIR) too long and "
1566                           "SOCKET_WRAPPER_DIR_ALLOW_ORIG not set");
1567                 abort();
1568
1569         }
1570
1571         swrap_dir = strdup(s);
1572         if (swrap_dir == NULL) {
1573                 SWRAP_LOG(SWRAP_LOG_ERROR,
1574                           "Unable to duplicate socket_wrapper dir path");
1575                 abort();
1576         }
1577
1578         SWRAP_LOG(SWRAP_LOG_WARN,
1579                   "realpath(SOCKET_WRAPPER_DIR) too long, "
1580                   "using original SOCKET_WRAPPER_DIR\n");
1581
1582 done:
1583         SWRAP_LOG(SWRAP_LOG_TRACE, "socket_wrapper_dir: %s", swrap_dir);
1584         return swrap_dir;
1585 }
1586
1587 static unsigned int socket_wrapper_mtu(void)
1588 {
1589         static unsigned int max_mtu = 0;
1590         unsigned int tmp;
1591         const char *s;
1592         char *endp;
1593
1594         swrap_mutex_lock(&mtu_update_mutex);
1595
1596         if (max_mtu != 0) {
1597                 goto done;
1598         }
1599
1600         max_mtu = SOCKET_WRAPPER_MTU_DEFAULT;
1601
1602         s = getenv("SOCKET_WRAPPER_MTU");
1603         if (s == NULL) {
1604                 goto done;
1605         }
1606
1607         tmp = strtol(s, &endp, 10);
1608         if (s == endp) {
1609                 goto done;
1610         }
1611
1612         if (tmp < SOCKET_WRAPPER_MTU_MIN || tmp > SOCKET_WRAPPER_MTU_MAX) {
1613                 goto done;
1614         }
1615         max_mtu = tmp;
1616
1617 done:
1618         swrap_mutex_unlock(&mtu_update_mutex);
1619         return max_mtu;
1620 }
1621
1622 static int _socket_wrapper_init_mutex(pthread_mutex_t *m, const char *name)
1623 {
1624         pthread_mutexattr_t ma;
1625         bool need_destroy = false;
1626         int ret = 0;
1627
1628 #define __CHECK(cmd) do { \
1629         ret = cmd; \
1630         if (ret != 0) { \
1631                 SWRAP_LOG(SWRAP_LOG_ERROR, \
1632                           "%s: %s - failed %d", \
1633                           name, #cmd, ret); \
1634                 goto done; \
1635         } \
1636 } while(0)
1637
1638         *m = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
1639         __CHECK(pthread_mutexattr_init(&ma));
1640         need_destroy = true;
1641         __CHECK(pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK));
1642         __CHECK(pthread_mutex_init(m, &ma));
1643 done:
1644         if (need_destroy) {
1645                 pthread_mutexattr_destroy(&ma);
1646         }
1647         return ret;
1648 }
1649
1650 static size_t socket_wrapper_max_sockets(void)
1651 {
1652         const char *s;
1653         size_t tmp;
1654         char *endp;
1655
1656         if (socket_info_max != 0) {
1657                 return socket_info_max;
1658         }
1659
1660         socket_info_max = SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT;
1661
1662         s = getenv("SOCKET_WRAPPER_MAX_SOCKETS");
1663         if (s == NULL || s[0] == '\0') {
1664                 goto done;
1665         }
1666
1667         tmp = strtoul(s, &endp, 10);
1668         if (s == endp) {
1669                 goto done;
1670         }
1671         if (tmp == 0) {
1672                 tmp = SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT;
1673                 SWRAP_LOG(SWRAP_LOG_ERROR,
1674                           "Invalid number of sockets specified, "
1675                           "using default (%zu)",
1676                           tmp);
1677         }
1678
1679         if (tmp > SOCKET_WRAPPER_MAX_SOCKETS_LIMIT) {
1680                 tmp = SOCKET_WRAPPER_MAX_SOCKETS_LIMIT;
1681                 SWRAP_LOG(SWRAP_LOG_ERROR,
1682                           "Invalid number of sockets specified, "
1683                           "using maximum (%zu).",
1684                           tmp);
1685         }
1686
1687         socket_info_max = tmp;
1688
1689 done:
1690         return socket_info_max;
1691 }
1692
1693 static void socket_wrapper_init_fds_idx(void)
1694 {
1695         int *tmp = NULL;
1696         size_t i;
1697
1698         if (socket_fds_idx != NULL) {
1699                 return;
1700         }
1701
1702         tmp = (int *)calloc(socket_fds_max, sizeof(int));
1703         if (tmp == NULL) {
1704                 SWRAP_LOG(SWRAP_LOG_ERROR,
1705                           "Failed to allocate socket fds index array: %s",
1706                           strerror(errno));
1707                 exit(-1);
1708         }
1709
1710         for (i = 0; i < socket_fds_max; i++) {
1711                 tmp[i] = -1;
1712         }
1713
1714         socket_fds_idx = tmp;
1715 }
1716
1717 static void socket_wrapper_init_sockets(void)
1718 {
1719         size_t max_sockets;
1720         size_t i;
1721         int ret = 0;
1722
1723         swrap_bind_symbol_all();
1724
1725         swrap_mutex_lock(&sockets_mutex);
1726
1727         if (sockets != NULL) {
1728                 swrap_mutex_unlock(&sockets_mutex);
1729                 return;
1730         }
1731
1732         SWRAP_LOG(SWRAP_LOG_DEBUG,
1733                   "SOCKET_WRAPPER_PACKAGE[%s] SOCKET_WRAPPER_VERSION[%s]",
1734                   SOCKET_WRAPPER_PACKAGE, SOCKET_WRAPPER_VERSION);
1735
1736         /*
1737          * Intialize the static cache early before
1738          * any thread is able to start.
1739          */
1740         (void)swrap_ipv4_net();
1741
1742         socket_wrapper_init_fds_idx();
1743
1744         /* Needs to be called inside the sockets_mutex lock here. */
1745         max_sockets = socket_wrapper_max_sockets();
1746
1747         sockets = (struct socket_info_container *)calloc(max_sockets,
1748                                         sizeof(struct socket_info_container));
1749
1750         if (sockets == NULL) {
1751                 SWRAP_LOG(SWRAP_LOG_ERROR,
1752                           "Failed to allocate sockets array: %s",
1753                           strerror(errno));
1754                 swrap_mutex_unlock(&sockets_mutex);
1755                 exit(-1);
1756         }
1757
1758         swrap_mutex_lock(&first_free_mutex);
1759         swrap_mutex_lock(&sockets_si_global);
1760
1761         first_free = 0;
1762
1763         for (i = 0; i < max_sockets; i++) {
1764                 swrap_set_next_free(&sockets[i].info, i+1);
1765         }
1766
1767         /* mark the end of the free list */
1768         swrap_set_next_free(&sockets[max_sockets-1].info, -1);
1769
1770         swrap_mutex_unlock(&sockets_si_global);
1771         swrap_mutex_unlock(&first_free_mutex);
1772         swrap_mutex_unlock(&sockets_mutex);
1773         if (ret != 0) {
1774                 exit(-1);
1775         }
1776 }
1777
1778 bool socket_wrapper_enabled(void)
1779 {
1780         char *s = socket_wrapper_dir();
1781
1782         if (s == NULL) {
1783                 return false;
1784         }
1785
1786         SAFE_FREE(s);
1787
1788         socket_wrapper_init_sockets();
1789
1790         return true;
1791 }
1792
1793 static unsigned int socket_wrapper_default_iface(void)
1794 {
1795         const char *s = getenv("SOCKET_WRAPPER_DEFAULT_IFACE");
1796         if (s) {
1797                 unsigned int iface;
1798                 if (sscanf(s, "%u", &iface) == 1) {
1799                         if (iface >= 1 && iface <= MAX_WRAPPED_INTERFACES) {
1800                                 return iface;
1801                         }
1802                 }
1803         }
1804
1805         return 1;/* 127.0.0.1 */
1806 }
1807
1808 static void set_socket_info_index(int fd, int idx)
1809 {
1810         SWRAP_LOG(SWRAP_LOG_TRACE,
1811                   "fd=%d idx=%d",
1812                   fd, idx);
1813         socket_fds_idx[fd] = idx;
1814         /* This builtin issues a full memory barrier. */
1815         __sync_synchronize();
1816 }
1817
1818 static void reset_socket_info_index(int fd)
1819 {
1820         SWRAP_LOG(SWRAP_LOG_TRACE,
1821                   "fd=%d idx=%d",
1822                   fd, -1);
1823         set_socket_info_index(fd, -1);
1824 }
1825
1826 static int find_socket_info_index(int fd)
1827 {
1828         if (fd < 0) {
1829                 return -1;
1830         }
1831
1832         if (socket_fds_idx == NULL) {
1833                 return -1;
1834         }
1835
1836         if ((size_t)fd >= socket_fds_max) {
1837                 /*
1838                  * Do not add a log here as some applications do stupid things
1839                  * like:
1840                  *
1841                  *     for (fd = 0; fd <= getdtablesize(); fd++) {
1842                  *         close(fd)
1843                  *     };
1844                  *
1845                  * This would produce millions of lines of debug messages.
1846                  */
1847 #if 0
1848                 SWRAP_LOG(SWRAP_LOG_ERROR,
1849                           "Looking for a socket info for the fd %d is over the "
1850                           "max socket index limit of %zu.",
1851                           fd,
1852                           socket_fds_max);
1853 #endif
1854                 return -1;
1855         }
1856
1857         /* This builtin issues a full memory barrier. */
1858         __sync_synchronize();
1859         return socket_fds_idx[fd];
1860 }
1861
1862 static int swrap_add_socket_info(const struct socket_info *si_input)
1863 {
1864         struct socket_info *si = NULL;
1865         int si_index = -1;
1866
1867         if (si_input == NULL) {
1868                 errno = EINVAL;
1869                 return -1;
1870         }
1871
1872         swrap_mutex_lock(&first_free_mutex);
1873         if (first_free == -1) {
1874                 errno = ENFILE;
1875                 goto out;
1876         }
1877
1878         si_index = first_free;
1879         si = swrap_get_socket_info(si_index);
1880
1881         SWRAP_LOCK_SI(si);
1882
1883         first_free = swrap_get_next_free(si);
1884         *si = *si_input;
1885         swrap_inc_refcount(si);
1886
1887         SWRAP_UNLOCK_SI(si);
1888
1889 out:
1890         swrap_mutex_unlock(&first_free_mutex);
1891
1892         return si_index;
1893 }
1894
1895 static int swrap_create_socket(struct socket_info *si, int fd)
1896 {
1897         int idx;
1898
1899         if ((size_t)fd >= socket_fds_max) {
1900                 SWRAP_LOG(SWRAP_LOG_ERROR,
1901                           "The max socket index limit of %zu has been reached, "
1902                           "trying to add %d",
1903                           socket_fds_max,
1904                           fd);
1905                 errno = EMFILE;
1906                 return -1;
1907         }
1908
1909         idx = swrap_add_socket_info(si);
1910         if (idx == -1) {
1911                 return -1;
1912         }
1913
1914         set_socket_info_index(fd, idx);
1915
1916         return idx;
1917 }
1918
1919 static int convert_un_in(const struct sockaddr_un *un, struct sockaddr *in, socklen_t *len)
1920 {
1921         unsigned int iface;
1922         unsigned int prt;
1923         const char *p;
1924         char type;
1925
1926         p = strrchr(un->sun_path, '/');
1927         if (p) p++; else p = un->sun_path;
1928
1929         if (sscanf(p, SOCKET_FORMAT, &type, &iface, &prt) != 3) {
1930                 SWRAP_LOG(SWRAP_LOG_ERROR, "sun_path[%s] p[%s]",
1931                           un->sun_path, p);
1932                 errno = EINVAL;
1933                 return -1;
1934         }
1935
1936         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
1937                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
1938                           type, iface, prt);
1939                 errno = EINVAL;
1940                 return -1;
1941         }
1942
1943         if (prt > 0xFFFF) {
1944                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
1945                           type, iface, prt);
1946                 errno = EINVAL;
1947                 return -1;
1948         }
1949
1950         SWRAP_LOG(SWRAP_LOG_TRACE, "type %c iface %u port %u",
1951                   type, iface, prt);
1952
1953         switch(type) {
1954         case SOCKET_TYPE_CHAR_TCP:
1955         case SOCKET_TYPE_CHAR_UDP: {
1956                 struct sockaddr_in *in2 = (struct sockaddr_in *)(void *)in;
1957
1958                 if ((*len) < sizeof(*in2)) {
1959                         SWRAP_LOG(SWRAP_LOG_ERROR,
1960                                   "V4: *len(%zu) < sizeof(*in2)=%zu",
1961                                   (size_t)*len, sizeof(*in2));
1962                         errno = EINVAL;
1963                         return -1;
1964                 }
1965
1966                 memset(in2, 0, sizeof(*in2));
1967                 in2->sin_family = AF_INET;
1968                 in2->sin_addr.s_addr = htonl(swrap_ipv4_iface(iface));
1969                 in2->sin_port = htons(prt);
1970
1971                 *len = sizeof(*in2);
1972                 break;
1973         }
1974 #ifdef HAVE_IPV6
1975         case SOCKET_TYPE_CHAR_TCP_V6:
1976         case SOCKET_TYPE_CHAR_UDP_V6: {
1977                 struct sockaddr_in6 *in2 = (struct sockaddr_in6 *)(void *)in;
1978
1979                 if ((*len) < sizeof(*in2)) {
1980                         SWRAP_LOG(SWRAP_LOG_ERROR,
1981                                   "V6: *len(%zu) < sizeof(*in2)=%zu",
1982                                   (size_t)*len, sizeof(*in2));
1983                         SWRAP_LOG(SWRAP_LOG_ERROR, "LINE:%d", __LINE__);
1984                         errno = EINVAL;
1985                         return -1;
1986                 }
1987
1988                 memset(in2, 0, sizeof(*in2));
1989                 in2->sin6_family = AF_INET6;
1990                 in2->sin6_addr = *swrap_ipv6();
1991                 in2->sin6_addr.s6_addr[15] = iface;
1992                 in2->sin6_port = htons(prt);
1993
1994                 *len = sizeof(*in2);
1995                 break;
1996         }
1997 #endif
1998         default:
1999                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
2000                           type, iface, prt);
2001                 errno = EINVAL;
2002                 return -1;
2003         }
2004
2005         return 0;
2006 }
2007
2008 static int convert_in_un_remote(struct socket_info *si, const struct sockaddr *inaddr, struct sockaddr_un *un,
2009                                 int *bcast)
2010 {
2011         char type = '\0';
2012         unsigned int prt;
2013         unsigned int iface;
2014         int is_bcast = 0;
2015         char *swrap_dir = NULL;
2016
2017         if (bcast) *bcast = 0;
2018
2019         switch (inaddr->sa_family) {
2020         case AF_INET: {
2021                 const struct sockaddr_in *in =
2022                     (const struct sockaddr_in *)(const void *)inaddr;
2023                 unsigned int addr = ntohl(in->sin_addr.s_addr);
2024                 char u_type = '\0';
2025                 char b_type = '\0';
2026                 char a_type = '\0';
2027                 const unsigned int sw_net_addr = swrap_ipv4_net();
2028                 const unsigned int sw_bcast_addr = swrap_ipv4_bcast();
2029
2030                 switch (si->type) {
2031                 case SOCK_STREAM:
2032                         u_type = SOCKET_TYPE_CHAR_TCP;
2033                         break;
2034                 case SOCK_DGRAM:
2035                         u_type = SOCKET_TYPE_CHAR_UDP;
2036                         a_type = SOCKET_TYPE_CHAR_UDP;
2037                         b_type = SOCKET_TYPE_CHAR_UDP;
2038                         break;
2039                 default:
2040                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2041                         errno = ESOCKTNOSUPPORT;
2042                         return -1;
2043                 }
2044
2045                 prt = ntohs(in->sin_port);
2046                 if (a_type && addr == 0xFFFFFFFF) {
2047                         /* 255.255.255.255 only udp */
2048                         is_bcast = 2;
2049                         type = a_type;
2050                         iface = socket_wrapper_default_iface();
2051                 } else if (b_type && addr == sw_bcast_addr) {
2052                         /*
2053                          * 127.255.255.255
2054                          * or
2055                          * 10.255.255.255
2056                          * only udp
2057                          */
2058                         is_bcast = 1;
2059                         type = b_type;
2060                         iface = socket_wrapper_default_iface();
2061                 } else if ((addr & 0xFFFFFF00) == sw_net_addr) {
2062                         /* 127.0.0.X or 10.53.57.X */
2063                         is_bcast = 0;
2064                         type = u_type;
2065                         iface = (addr & 0x000000FF);
2066                 } else {
2067                         char str[256] = {0,};
2068                         inet_ntop(inaddr->sa_family,
2069                                   &in->sin_addr,
2070                                   str, sizeof(str));
2071                         SWRAP_LOG(SWRAP_LOG_WARN,
2072                                   "str[%s] prt[%u]",
2073                                   str, (unsigned)prt);
2074                         errno = ENETUNREACH;
2075                         return -1;
2076                 }
2077                 if (bcast) *bcast = is_bcast;
2078                 break;
2079         }
2080 #ifdef HAVE_IPV6
2081         case AF_INET6: {
2082                 const struct sockaddr_in6 *in =
2083                     (const struct sockaddr_in6 *)(const void *)inaddr;
2084                 struct in6_addr cmp1, cmp2;
2085
2086                 switch (si->type) {
2087                 case SOCK_STREAM:
2088                         type = SOCKET_TYPE_CHAR_TCP_V6;
2089                         break;
2090                 case SOCK_DGRAM:
2091                         type = SOCKET_TYPE_CHAR_UDP_V6;
2092                         break;
2093                 default:
2094                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2095                         errno = ESOCKTNOSUPPORT;
2096                         return -1;
2097                 }
2098
2099                 /* XXX no multicast/broadcast */
2100
2101                 prt = ntohs(in->sin6_port);
2102
2103                 cmp1 = *swrap_ipv6();
2104                 cmp2 = in->sin6_addr;
2105                 cmp2.s6_addr[15] = 0;
2106                 if (IN6_ARE_ADDR_EQUAL(&cmp1, &cmp2)) {
2107                         iface = in->sin6_addr.s6_addr[15];
2108                 } else {
2109                         char str[256] = {0,};
2110                         inet_ntop(inaddr->sa_family,
2111                                   &in->sin6_addr,
2112                                   str, sizeof(str));
2113                         SWRAP_LOG(SWRAP_LOG_WARN,
2114                                   "str[%s] prt[%u]",
2115                                   str, (unsigned)prt);
2116                         errno = ENETUNREACH;
2117                         return -1;
2118                 }
2119
2120                 break;
2121         }
2122 #endif
2123         default:
2124                 SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family!");
2125                 errno = ENETUNREACH;
2126                 return -1;
2127         }
2128
2129         if (prt == 0) {
2130                 SWRAP_LOG(SWRAP_LOG_WARN, "Port not set");
2131                 errno = EINVAL;
2132                 return -1;
2133         }
2134
2135         swrap_dir = socket_wrapper_dir();
2136         if (swrap_dir == NULL) {
2137                 errno = EINVAL;
2138                 return -1;
2139         }
2140
2141         if (is_bcast) {
2142                 swrap_un_path_EINVAL(un, swrap_dir);
2143                 SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2144                 SAFE_FREE(swrap_dir);
2145                 /* the caller need to do more processing */
2146                 return 0;
2147         }
2148
2149         swrap_un_path(un, swrap_dir, type, iface, prt);
2150         SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2151
2152         SAFE_FREE(swrap_dir);
2153
2154         return 0;
2155 }
2156
2157 static int convert_in_un_alloc(struct socket_info *si, const struct sockaddr *inaddr, struct sockaddr_un *un,
2158                                int *bcast)
2159 {
2160         char type = '\0';
2161         unsigned int prt;
2162         unsigned int iface;
2163         struct stat st;
2164         int is_bcast = 0;
2165         char *swrap_dir = NULL;
2166
2167         if (bcast) *bcast = 0;
2168
2169         switch (si->family) {
2170         case AF_INET: {
2171                 const struct sockaddr_in *in =
2172                     (const struct sockaddr_in *)(const void *)inaddr;
2173                 unsigned int addr = ntohl(in->sin_addr.s_addr);
2174                 char u_type = '\0';
2175                 char d_type = '\0';
2176                 char b_type = '\0';
2177                 char a_type = '\0';
2178                 const unsigned int sw_net_addr = swrap_ipv4_net();
2179                 const unsigned int sw_bcast_addr = swrap_ipv4_bcast();
2180
2181                 prt = ntohs(in->sin_port);
2182
2183                 switch (si->type) {
2184                 case SOCK_STREAM:
2185                         u_type = SOCKET_TYPE_CHAR_TCP;
2186                         d_type = SOCKET_TYPE_CHAR_TCP;
2187                         break;
2188                 case SOCK_DGRAM:
2189                         u_type = SOCKET_TYPE_CHAR_UDP;
2190                         d_type = SOCKET_TYPE_CHAR_UDP;
2191                         a_type = SOCKET_TYPE_CHAR_UDP;
2192                         b_type = SOCKET_TYPE_CHAR_UDP;
2193                         break;
2194                 default:
2195                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2196                         errno = ESOCKTNOSUPPORT;
2197                         return -1;
2198                 }
2199
2200                 if (addr == 0) {
2201                         /* 0.0.0.0 */
2202                         is_bcast = 0;
2203                         type = d_type;
2204                         iface = socket_wrapper_default_iface();
2205                 } else if (a_type && addr == 0xFFFFFFFF) {
2206                         /* 255.255.255.255 only udp */
2207                         is_bcast = 2;
2208                         type = a_type;
2209                         iface = socket_wrapper_default_iface();
2210                 } else if (b_type && addr == sw_bcast_addr) {
2211                         /* 127.255.255.255 only udp */
2212                         is_bcast = 1;
2213                         type = b_type;
2214                         iface = socket_wrapper_default_iface();
2215                 } else if ((addr & 0xFFFFFF00) == sw_net_addr) {
2216                         /* 127.0.0.X */
2217                         is_bcast = 0;
2218                         type = u_type;
2219                         iface = (addr & 0x000000FF);
2220                 } else {
2221                         errno = EADDRNOTAVAIL;
2222                         return -1;
2223                 }
2224
2225                 /* Store the bind address for connect() */
2226                 if (si->bindname.sa_socklen == 0) {
2227                         struct sockaddr_in bind_in;
2228                         socklen_t blen = sizeof(struct sockaddr_in);
2229
2230                         ZERO_STRUCT(bind_in);
2231                         bind_in.sin_family = in->sin_family;
2232                         bind_in.sin_port = in->sin_port;
2233                         bind_in.sin_addr.s_addr = htonl(swrap_ipv4_iface(iface));
2234                         si->bindname.sa_socklen = blen;
2235                         memcpy(&si->bindname.sa.in, &bind_in, blen);
2236                 }
2237
2238                 break;
2239         }
2240 #ifdef HAVE_IPV6
2241         case AF_INET6: {
2242                 const struct sockaddr_in6 *in =
2243                     (const struct sockaddr_in6 *)(const void *)inaddr;
2244                 struct in6_addr cmp1, cmp2;
2245
2246                 switch (si->type) {
2247                 case SOCK_STREAM:
2248                         type = SOCKET_TYPE_CHAR_TCP_V6;
2249                         break;
2250                 case SOCK_DGRAM:
2251                         type = SOCKET_TYPE_CHAR_UDP_V6;
2252                         break;
2253                 default:
2254                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2255                         errno = ESOCKTNOSUPPORT;
2256                         return -1;
2257                 }
2258
2259                 /* XXX no multicast/broadcast */
2260
2261                 prt = ntohs(in->sin6_port);
2262
2263                 cmp1 = *swrap_ipv6();
2264                 cmp2 = in->sin6_addr;
2265                 cmp2.s6_addr[15] = 0;
2266                 if (IN6_IS_ADDR_UNSPECIFIED(&in->sin6_addr)) {
2267                         iface = socket_wrapper_default_iface();
2268                 } else if (IN6_ARE_ADDR_EQUAL(&cmp1, &cmp2)) {
2269                         iface = in->sin6_addr.s6_addr[15];
2270                 } else {
2271                         errno = EADDRNOTAVAIL;
2272                         return -1;
2273                 }
2274
2275                 /* Store the bind address for connect() */
2276                 if (si->bindname.sa_socklen == 0) {
2277                         struct sockaddr_in6 bind_in;
2278                         socklen_t blen = sizeof(struct sockaddr_in6);
2279
2280                         ZERO_STRUCT(bind_in);
2281                         bind_in.sin6_family = in->sin6_family;
2282                         bind_in.sin6_port = in->sin6_port;
2283
2284                         bind_in.sin6_addr = *swrap_ipv6();
2285                         bind_in.sin6_addr.s6_addr[15] = iface;
2286
2287                         memcpy(&si->bindname.sa.in6, &bind_in, blen);
2288                         si->bindname.sa_socklen = blen;
2289                 }
2290
2291                 break;
2292         }
2293 #endif
2294         default:
2295                 SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2296                 errno = EADDRNOTAVAIL;
2297                 return -1;
2298         }
2299
2300
2301         if (bcast) *bcast = is_bcast;
2302
2303         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
2304                 errno = EINVAL;
2305                 return -1;
2306         }
2307
2308         swrap_dir = socket_wrapper_dir();
2309         if (swrap_dir == NULL) {
2310                 errno = EINVAL;
2311                 return -1;
2312         }
2313
2314         if (prt == 0) {
2315                 /* handle auto-allocation of ephemeral ports */
2316                 for (prt = 5001; prt < 10000; prt++) {
2317                         swrap_un_path(un, swrap_dir, type, iface, prt);
2318                         if (stat(un->sun_path, &st) == 0) continue;
2319
2320                         set_port(si->family, prt, &si->myname);
2321                         set_port(si->family, prt, &si->bindname);
2322
2323                         break;
2324                 }
2325
2326                 if (prt == 10000) {
2327                         errno = ENFILE;
2328                         SAFE_FREE(swrap_dir);
2329                         return -1;
2330                 }
2331         }
2332
2333         swrap_un_path(un, swrap_dir, type, iface, prt);
2334         SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2335
2336         SAFE_FREE(swrap_dir);
2337
2338         return 0;
2339 }
2340
2341 static struct socket_info *find_socket_info(int fd)
2342 {
2343         int idx = find_socket_info_index(fd);
2344
2345         if (idx == -1) {
2346                 return NULL;
2347         }
2348
2349         return swrap_get_socket_info(idx);
2350 }
2351
2352 #if 0 /* FIXME */
2353 static bool check_addr_port_in_use(const struct sockaddr *sa, socklen_t len)
2354 {
2355         struct socket_info_fd *f;
2356         const struct socket_info *last_s = NULL;
2357
2358         /* first catch invalid input */
2359         switch (sa->sa_family) {
2360         case AF_INET:
2361                 if (len < sizeof(struct sockaddr_in)) {
2362                         return false;
2363                 }
2364                 break;
2365 #ifdef HAVE_IPV6
2366         case AF_INET6:
2367                 if (len < sizeof(struct sockaddr_in6)) {
2368                         return false;
2369                 }
2370                 break;
2371 #endif
2372         default:
2373                 return false;
2374                 break;
2375         }
2376
2377         for (f = socket_fds; f; f = f->next) {
2378                 struct socket_info *s = swrap_get_socket_info(f->si_index);
2379
2380                 if (s == last_s) {
2381                         continue;
2382                 }
2383                 last_s = s;
2384
2385                 if (s->myname == NULL) {
2386                         continue;
2387                 }
2388                 if (s->myname->sa_family != sa->sa_family) {
2389                         continue;
2390                 }
2391                 switch (s->myname->sa_family) {
2392                 case AF_INET: {
2393                         struct sockaddr_in *sin1, *sin2;
2394
2395                         sin1 = (struct sockaddr_in *)s->myname;
2396                         sin2 = (struct sockaddr_in *)sa;
2397
2398                         if (sin1->sin_addr.s_addr == htonl(INADDR_ANY)) {
2399                                 continue;
2400                         }
2401                         if (sin1->sin_port != sin2->sin_port) {
2402                                 continue;
2403                         }
2404                         if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr) {
2405                                 continue;
2406                         }
2407
2408                         /* found */
2409                         return true;
2410                         break;
2411                 }
2412 #ifdef HAVE_IPV6
2413                 case AF_INET6: {
2414                         struct sockaddr_in6 *sin1, *sin2;
2415
2416                         sin1 = (struct sockaddr_in6 *)s->myname;
2417                         sin2 = (struct sockaddr_in6 *)sa;
2418
2419                         if (sin1->sin6_port != sin2->sin6_port) {
2420                                 continue;
2421                         }
2422                         if (!IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2423                                                 &sin2->sin6_addr))
2424                         {
2425                                 continue;
2426                         }
2427
2428                         /* found */
2429                         return true;
2430                         break;
2431                 }
2432 #endif
2433                 default:
2434                         continue;
2435                         break;
2436
2437                 }
2438         }
2439
2440         return false;
2441 }
2442 #endif
2443
2444 static void swrap_remove_stale(int fd);
2445
2446 static int sockaddr_convert_to_un(struct socket_info *si,
2447                                   const struct sockaddr *in_addr,
2448                                   socklen_t in_len,
2449                                   struct sockaddr_un *out_addr,
2450                                   int alloc_sock,
2451                                   int *bcast)
2452 {
2453         struct sockaddr *out = (struct sockaddr *)(void *)out_addr;
2454
2455         (void) in_len; /* unused */
2456
2457         if (out_addr == NULL) {
2458                 return 0;
2459         }
2460
2461         out->sa_family = AF_UNIX;
2462 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2463         out->sa_len = sizeof(*out_addr);
2464 #endif
2465
2466         switch (in_addr->sa_family) {
2467         case AF_UNSPEC: {
2468                 const struct sockaddr_in *sin;
2469                 if (si->family != AF_INET) {
2470                         break;
2471                 }
2472                 if (in_len < sizeof(struct sockaddr_in)) {
2473                         break;
2474                 }
2475                 sin = (const struct sockaddr_in *)(const void *)in_addr;
2476                 if(sin->sin_addr.s_addr != htonl(INADDR_ANY)) {
2477                         break;
2478                 }
2479
2480                 /*
2481                  * Note: in the special case of AF_UNSPEC and INADDR_ANY,
2482                  * AF_UNSPEC is mapped to AF_INET and must be treated here.
2483                  */
2484
2485                 FALL_THROUGH;
2486         }
2487         case AF_INET:
2488 #ifdef HAVE_IPV6
2489         case AF_INET6:
2490 #endif
2491                 switch (si->type) {
2492                 case SOCK_STREAM:
2493                 case SOCK_DGRAM:
2494                         break;
2495                 default:
2496                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2497                         errno = ESOCKTNOSUPPORT;
2498                         return -1;
2499                 }
2500                 if (alloc_sock) {
2501                         return convert_in_un_alloc(si, in_addr, out_addr, bcast);
2502                 } else {
2503                         return convert_in_un_remote(si, in_addr, out_addr, bcast);
2504                 }
2505         default:
2506                 break;
2507         }
2508
2509         errno = EAFNOSUPPORT;
2510         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2511         return -1;
2512 }
2513
2514 static int sockaddr_convert_from_un(const struct socket_info *si,
2515                                     const struct sockaddr_un *in_addr,
2516                                     socklen_t un_addrlen,
2517                                     int family,
2518                                     struct sockaddr *out_addr,
2519                                     socklen_t *out_addrlen)
2520 {
2521         int ret;
2522
2523         if (out_addr == NULL || out_addrlen == NULL)
2524                 return 0;
2525
2526         if (un_addrlen == 0) {
2527                 *out_addrlen = 0;
2528                 return 0;
2529         }
2530
2531         switch (family) {
2532         case AF_INET:
2533 #ifdef HAVE_IPV6
2534         case AF_INET6:
2535 #endif
2536                 switch (si->type) {
2537                 case SOCK_STREAM:
2538                 case SOCK_DGRAM:
2539                         break;
2540                 default:
2541                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2542                         errno = ESOCKTNOSUPPORT;
2543                         return -1;
2544                 }
2545                 ret = convert_un_in(in_addr, out_addr, out_addrlen);
2546 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2547                 out_addr->sa_len = *out_addrlen;
2548 #endif
2549                 return ret;
2550         default:
2551                 break;
2552         }
2553
2554         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2555         errno = EAFNOSUPPORT;
2556         return -1;
2557 }
2558
2559 enum swrap_packet_type {
2560         SWRAP_CONNECT_SEND,
2561         SWRAP_CONNECT_UNREACH,
2562         SWRAP_CONNECT_RECV,
2563         SWRAP_CONNECT_ACK,
2564         SWRAP_ACCEPT_SEND,
2565         SWRAP_ACCEPT_RECV,
2566         SWRAP_ACCEPT_ACK,
2567         SWRAP_RECVFROM,
2568         SWRAP_SENDTO,
2569         SWRAP_SENDTO_UNREACH,
2570         SWRAP_PENDING_RST,
2571         SWRAP_RECV,
2572         SWRAP_RECV_RST,
2573         SWRAP_SEND,
2574         SWRAP_SEND_RST,
2575         SWRAP_CLOSE_SEND,
2576         SWRAP_CLOSE_RECV,
2577         SWRAP_CLOSE_ACK,
2578 };
2579
2580 struct swrap_file_hdr {
2581         uint32_t        magic;
2582         uint16_t        version_major;
2583         uint16_t        version_minor;
2584         int32_t         timezone;
2585         uint32_t        sigfigs;
2586         uint32_t        frame_max_len;
2587 #define SWRAP_FRAME_LENGTH_MAX 0xFFFF
2588         uint32_t        link_type;
2589 };
2590 #define SWRAP_FILE_HDR_SIZE 24
2591
2592 struct swrap_packet_frame {
2593         uint32_t seconds;
2594         uint32_t micro_seconds;
2595         uint32_t recorded_length;
2596         uint32_t full_length;
2597 };
2598 #define SWRAP_PACKET_FRAME_SIZE 16
2599
2600 union swrap_packet_ip {
2601         struct {
2602                 uint8_t         ver_hdrlen;
2603                 uint8_t         tos;
2604                 uint16_t        packet_length;
2605                 uint16_t        identification;
2606                 uint8_t         flags;
2607                 uint8_t         fragment;
2608                 uint8_t         ttl;
2609                 uint8_t         protocol;
2610                 uint16_t        hdr_checksum;
2611                 uint32_t        src_addr;
2612                 uint32_t        dest_addr;
2613         } v4;
2614 #define SWRAP_PACKET_IP_V4_SIZE 20
2615         struct {
2616                 uint8_t         ver_prio;
2617                 uint8_t         flow_label_high;
2618                 uint16_t        flow_label_low;
2619                 uint16_t        payload_length;
2620                 uint8_t         next_header;
2621                 uint8_t         hop_limit;
2622                 uint8_t         src_addr[16];
2623                 uint8_t         dest_addr[16];
2624         } v6;
2625 #define SWRAP_PACKET_IP_V6_SIZE 40
2626 };
2627 #define SWRAP_PACKET_IP_SIZE 40
2628
2629 union swrap_packet_payload {
2630         struct {
2631                 uint16_t        source_port;
2632                 uint16_t        dest_port;
2633                 uint32_t        seq_num;
2634                 uint32_t        ack_num;
2635                 uint8_t         hdr_length;
2636                 uint8_t         control;
2637                 uint16_t        window;
2638                 uint16_t        checksum;
2639                 uint16_t        urg;
2640         } tcp;
2641 #define SWRAP_PACKET_PAYLOAD_TCP_SIZE 20
2642         struct {
2643                 uint16_t        source_port;
2644                 uint16_t        dest_port;
2645                 uint16_t        length;
2646                 uint16_t        checksum;
2647         } udp;
2648 #define SWRAP_PACKET_PAYLOAD_UDP_SIZE 8
2649         struct {
2650                 uint8_t         type;
2651                 uint8_t         code;
2652                 uint16_t        checksum;
2653                 uint32_t        unused;
2654         } icmp4;
2655 #define SWRAP_PACKET_PAYLOAD_ICMP4_SIZE 8
2656         struct {
2657                 uint8_t         type;
2658                 uint8_t         code;
2659                 uint16_t        checksum;
2660                 uint32_t        unused;
2661         } icmp6;
2662 #define SWRAP_PACKET_PAYLOAD_ICMP6_SIZE 8
2663 };
2664 #define SWRAP_PACKET_PAYLOAD_SIZE 20
2665
2666 #define SWRAP_PACKET_MIN_ALLOC \
2667         (SWRAP_PACKET_FRAME_SIZE + \
2668          SWRAP_PACKET_IP_SIZE + \
2669          SWRAP_PACKET_PAYLOAD_SIZE)
2670
2671 static const char *swrap_pcap_init_file(void)
2672 {
2673         static int initialized = 0;
2674         static const char *s = NULL;
2675         static const struct swrap_file_hdr h;
2676         static const struct swrap_packet_frame f;
2677         static const union swrap_packet_ip i;
2678         static const union swrap_packet_payload p;
2679
2680         if (initialized == 1) {
2681                 return s;
2682         }
2683         initialized = 1;
2684
2685         /*
2686          * TODO: don't use the structs use plain buffer offsets
2687          *       and PUSH_U8(), PUSH_U16() and PUSH_U32()
2688          *
2689          * for now make sure we disable PCAP support
2690          * if the struct has alignment!
2691          */
2692         if (sizeof(h) != SWRAP_FILE_HDR_SIZE) {
2693                 return NULL;
2694         }
2695         if (sizeof(f) != SWRAP_PACKET_FRAME_SIZE) {
2696                 return NULL;
2697         }
2698         if (sizeof(i) != SWRAP_PACKET_IP_SIZE) {
2699                 return NULL;
2700         }
2701         if (sizeof(i.v4) != SWRAP_PACKET_IP_V4_SIZE) {
2702                 return NULL;
2703         }
2704         if (sizeof(i.v6) != SWRAP_PACKET_IP_V6_SIZE) {
2705                 return NULL;
2706         }
2707         if (sizeof(p) != SWRAP_PACKET_PAYLOAD_SIZE) {
2708                 return NULL;
2709         }
2710         if (sizeof(p.tcp) != SWRAP_PACKET_PAYLOAD_TCP_SIZE) {
2711                 return NULL;
2712         }
2713         if (sizeof(p.udp) != SWRAP_PACKET_PAYLOAD_UDP_SIZE) {
2714                 return NULL;
2715         }
2716         if (sizeof(p.icmp4) != SWRAP_PACKET_PAYLOAD_ICMP4_SIZE) {
2717                 return NULL;
2718         }
2719         if (sizeof(p.icmp6) != SWRAP_PACKET_PAYLOAD_ICMP6_SIZE) {
2720                 return NULL;
2721         }
2722
2723         s = getenv("SOCKET_WRAPPER_PCAP_FILE");
2724         if (s == NULL) {
2725                 return NULL;
2726         }
2727         if (strncmp(s, "./", 2) == 0) {
2728                 s += 2;
2729         }
2730         SWRAP_LOG(SWRAP_LOG_TRACE, "SOCKET_WRAPPER_PCAP_FILE: %s", s);
2731         return s;
2732 }
2733
2734 static uint8_t *swrap_pcap_packet_init(struct timeval *tval,
2735                                        const struct sockaddr *src,
2736                                        const struct sockaddr *dest,
2737                                        int socket_type,
2738                                        const uint8_t *payload,
2739                                        size_t payload_len,
2740                                        unsigned long tcp_seqno,
2741                                        unsigned long tcp_ack,
2742                                        unsigned char tcp_ctl,
2743                                        int unreachable,
2744                                        size_t *_packet_len)
2745 {
2746         uint8_t *base = NULL;
2747         uint8_t *buf = NULL;
2748         union {
2749                 uint8_t *ptr;
2750                 struct swrap_packet_frame *frame;
2751         } f;
2752         union {
2753                 uint8_t *ptr;
2754                 union swrap_packet_ip *ip;
2755         } i;
2756         union swrap_packet_payload *pay;
2757         size_t packet_len;
2758         size_t alloc_len;
2759         size_t nonwire_len = sizeof(struct swrap_packet_frame);
2760         size_t wire_hdr_len = 0;
2761         size_t wire_len = 0;
2762         size_t ip_hdr_len = 0;
2763         size_t icmp_hdr_len = 0;
2764         size_t icmp_truncate_len = 0;
2765         uint8_t protocol = 0, icmp_protocol = 0;
2766         const struct sockaddr_in *src_in = NULL;
2767         const struct sockaddr_in *dest_in = NULL;
2768 #ifdef HAVE_IPV6
2769         const struct sockaddr_in6 *src_in6 = NULL;
2770         const struct sockaddr_in6 *dest_in6 = NULL;
2771 #endif
2772         uint16_t src_port;
2773         uint16_t dest_port;
2774
2775         switch (src->sa_family) {
2776         case AF_INET:
2777                 src_in = (const struct sockaddr_in *)(const void *)src;
2778                 dest_in = (const struct sockaddr_in *)(const void *)dest;
2779                 src_port = src_in->sin_port;
2780                 dest_port = dest_in->sin_port;
2781                 ip_hdr_len = sizeof(i.ip->v4);
2782                 break;
2783 #ifdef HAVE_IPV6
2784         case AF_INET6:
2785                 src_in6 = (const struct sockaddr_in6 *)(const void *)src;
2786                 dest_in6 = (const struct sockaddr_in6 *)(const void *)dest;
2787                 src_port = src_in6->sin6_port;
2788                 dest_port = dest_in6->sin6_port;
2789                 ip_hdr_len = sizeof(i.ip->v6);
2790                 break;
2791 #endif
2792         default:
2793                 return NULL;
2794         }
2795
2796         switch (socket_type) {
2797         case SOCK_STREAM:
2798                 protocol = 0x06; /* TCP */
2799                 wire_hdr_len = ip_hdr_len + sizeof(pay->tcp);
2800                 wire_len = wire_hdr_len + payload_len;
2801                 break;
2802
2803         case SOCK_DGRAM:
2804                 protocol = 0x11; /* UDP */
2805                 wire_hdr_len = ip_hdr_len + sizeof(pay->udp);
2806                 wire_len = wire_hdr_len + payload_len;
2807                 break;
2808
2809         default:
2810                 return NULL;
2811         }
2812
2813         if (unreachable) {
2814                 icmp_protocol = protocol;
2815                 switch (src->sa_family) {
2816                 case AF_INET:
2817                         protocol = 0x01; /* ICMPv4 */
2818                         icmp_hdr_len = ip_hdr_len + sizeof(pay->icmp4);
2819                         break;
2820 #ifdef HAVE_IPV6
2821                 case AF_INET6:
2822                         protocol = 0x3A; /* ICMPv6 */
2823                         icmp_hdr_len = ip_hdr_len + sizeof(pay->icmp6);
2824                         break;
2825 #endif
2826                 }
2827                 if (wire_len > 64 ) {
2828                         icmp_truncate_len = wire_len - 64;
2829                 }
2830                 wire_len += icmp_hdr_len;
2831         }
2832
2833         packet_len = nonwire_len + wire_len;
2834         alloc_len = packet_len;
2835         if (alloc_len < SWRAP_PACKET_MIN_ALLOC) {
2836                 alloc_len = SWRAP_PACKET_MIN_ALLOC;
2837         }
2838
2839         base = (uint8_t *)calloc(1, alloc_len);
2840         if (base == NULL) {
2841                 return NULL;
2842         }
2843
2844         buf = base;
2845         f.ptr = buf;
2846
2847         f.frame->seconds                = tval->tv_sec;
2848         f.frame->micro_seconds  = tval->tv_usec;
2849         f.frame->recorded_length        = wire_len - icmp_truncate_len;
2850         f.frame->full_length    = wire_len - icmp_truncate_len;
2851
2852         buf += SWRAP_PACKET_FRAME_SIZE;
2853
2854         i.ptr = buf;
2855         switch (src->sa_family) {
2856         case AF_INET:
2857                 if (src_in == NULL || dest_in == NULL) {
2858                         SAFE_FREE(base);
2859                         return NULL;
2860                 }
2861
2862                 i.ip->v4.ver_hdrlen     = 0x45; /* version 4 and 5 * 32 bit words */
2863                 i.ip->v4.tos            = 0x00;
2864                 i.ip->v4.packet_length  = htons(wire_len - icmp_truncate_len);
2865                 i.ip->v4.identification = htons(0xFFFF);
2866                 i.ip->v4.flags          = 0x40; /* BIT 1 set - means don't fragment */
2867                 i.ip->v4.fragment       = htons(0x0000);
2868                 i.ip->v4.ttl            = 0xFF;
2869                 i.ip->v4.protocol       = protocol;
2870                 i.ip->v4.hdr_checksum   = htons(0x0000);
2871                 i.ip->v4.src_addr       = src_in->sin_addr.s_addr;
2872                 i.ip->v4.dest_addr      = dest_in->sin_addr.s_addr;
2873                 buf += SWRAP_PACKET_IP_V4_SIZE;
2874                 break;
2875 #ifdef HAVE_IPV6
2876         case AF_INET6:
2877                 if (src_in6 == NULL || dest_in6 == NULL) {
2878                         SAFE_FREE(base);
2879                         return NULL;
2880                 }
2881
2882                 i.ip->v6.ver_prio               = 0x60; /* version 4 and 5 * 32 bit words */
2883                 i.ip->v6.flow_label_high        = 0x00;
2884                 i.ip->v6.flow_label_low = 0x0000;
2885                 i.ip->v6.payload_length = htons(wire_len - icmp_truncate_len); /* TODO */
2886                 i.ip->v6.next_header    = protocol;
2887                 memcpy(i.ip->v6.src_addr, src_in6->sin6_addr.s6_addr, 16);
2888                 memcpy(i.ip->v6.dest_addr, dest_in6->sin6_addr.s6_addr, 16);
2889                 buf += SWRAP_PACKET_IP_V6_SIZE;
2890                 break;
2891 #endif
2892         }
2893
2894         if (unreachable) {
2895                 pay = (union swrap_packet_payload *)(void *)buf;
2896                 switch (src->sa_family) {
2897                 case AF_INET:
2898                         pay->icmp4.type         = 0x03; /* destination unreachable */
2899                         pay->icmp4.code         = 0x01; /* host unreachable */
2900                         pay->icmp4.checksum     = htons(0x0000);
2901                         pay->icmp4.unused       = htonl(0x00000000);
2902
2903                         buf += SWRAP_PACKET_PAYLOAD_ICMP4_SIZE;
2904
2905                         /* set the ip header in the ICMP payload */
2906                         i.ptr = buf;
2907                         i.ip->v4.ver_hdrlen     = 0x45; /* version 4 and 5 * 32 bit words */
2908                         i.ip->v4.tos            = 0x00;
2909                         i.ip->v4.packet_length  = htons(wire_len - icmp_hdr_len);
2910                         i.ip->v4.identification = htons(0xFFFF);
2911                         i.ip->v4.flags          = 0x40; /* BIT 1 set - means don't fragment */
2912                         i.ip->v4.fragment       = htons(0x0000);
2913                         i.ip->v4.ttl            = 0xFF;
2914                         i.ip->v4.protocol       = icmp_protocol;
2915                         i.ip->v4.hdr_checksum   = htons(0x0000);
2916                         i.ip->v4.src_addr       = dest_in->sin_addr.s_addr;
2917                         i.ip->v4.dest_addr      = src_in->sin_addr.s_addr;
2918
2919                         buf += SWRAP_PACKET_IP_V4_SIZE;
2920
2921                         src_port = dest_in->sin_port;
2922                         dest_port = src_in->sin_port;
2923                         break;
2924 #ifdef HAVE_IPV6
2925                 case AF_INET6:
2926                         pay->icmp6.type         = 0x01; /* destination unreachable */
2927                         pay->icmp6.code         = 0x03; /* address unreachable */
2928                         pay->icmp6.checksum     = htons(0x0000);
2929                         pay->icmp6.unused       = htonl(0x00000000);
2930                         buf += SWRAP_PACKET_PAYLOAD_ICMP6_SIZE;
2931
2932                         /* set the ip header in the ICMP payload */
2933                         i.ptr = buf;
2934                         i.ip->v6.ver_prio               = 0x60; /* version 4 and 5 * 32 bit words */
2935                         i.ip->v6.flow_label_high        = 0x00;
2936                         i.ip->v6.flow_label_low = 0x0000;
2937                         i.ip->v6.payload_length = htons(wire_len - icmp_truncate_len); /* TODO */
2938                         i.ip->v6.next_header    = protocol;
2939                         memcpy(i.ip->v6.src_addr, dest_in6->sin6_addr.s6_addr, 16);
2940                         memcpy(i.ip->v6.dest_addr, src_in6->sin6_addr.s6_addr, 16);
2941
2942                         buf += SWRAP_PACKET_IP_V6_SIZE;
2943
2944                         src_port = dest_in6->sin6_port;
2945                         dest_port = src_in6->sin6_port;
2946                         break;
2947 #endif
2948                 }
2949         }
2950
2951         pay = (union swrap_packet_payload *)(void *)buf;
2952
2953         switch (socket_type) {
2954         case SOCK_STREAM:
2955                 pay->tcp.source_port    = src_port;
2956                 pay->tcp.dest_port      = dest_port;
2957                 pay->tcp.seq_num        = htonl(tcp_seqno);
2958                 pay->tcp.ack_num        = htonl(tcp_ack);
2959                 pay->tcp.hdr_length     = 0x50; /* 5 * 32 bit words */
2960                 pay->tcp.control        = tcp_ctl;
2961                 pay->tcp.window         = htons(0x7FFF);
2962                 pay->tcp.checksum       = htons(0x0000);
2963                 pay->tcp.urg            = htons(0x0000);
2964                 buf += SWRAP_PACKET_PAYLOAD_TCP_SIZE;
2965
2966                 break;
2967
2968         case SOCK_DGRAM:
2969                 pay->udp.source_port    = src_port;
2970                 pay->udp.dest_port      = dest_port;
2971                 pay->udp.length         = htons(8 + payload_len);
2972                 pay->udp.checksum       = htons(0x0000);
2973                 buf += SWRAP_PACKET_PAYLOAD_UDP_SIZE;
2974
2975                 break;
2976         }
2977
2978         if (payload && payload_len > 0) {
2979                 memcpy(buf, payload, payload_len);
2980         }
2981
2982         *_packet_len = packet_len - icmp_truncate_len;
2983         return base;
2984 }
2985
2986 static int swrap_pcap_get_fd(const char *fname)
2987 {
2988         static int fd = -1;
2989
2990         if (fd != -1) {
2991                 return fd;
2992         }
2993
2994         fd = libc_open(fname, O_WRONLY|O_CREAT|O_EXCL|O_APPEND, 0644);
2995         if (fd != -1) {
2996                 struct swrap_file_hdr file_hdr;
2997                 file_hdr.magic          = 0xA1B2C3D4;
2998                 file_hdr.version_major  = 0x0002;
2999                 file_hdr.version_minor  = 0x0004;
3000                 file_hdr.timezone       = 0x00000000;
3001                 file_hdr.sigfigs        = 0x00000000;
3002                 file_hdr.frame_max_len  = SWRAP_FRAME_LENGTH_MAX;
3003                 file_hdr.link_type      = 0x0065; /* 101 RAW IP */
3004
3005                 if (libc_write(fd, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
3006                         libc_close(fd);
3007                         fd = -1;
3008                 }
3009                 return fd;
3010         }
3011
3012         fd = libc_open(fname, O_WRONLY|O_APPEND, 0644);
3013
3014         return fd;
3015 }
3016
3017 static uint8_t *swrap_pcap_marshall_packet(struct socket_info *si,
3018                                            const struct sockaddr *addr,
3019                                            enum swrap_packet_type type,
3020                                            const void *buf, size_t len,
3021                                            size_t *packet_len)
3022 {
3023         const struct sockaddr *src_addr;
3024         const struct sockaddr *dest_addr;
3025         unsigned long tcp_seqno = 0;
3026         unsigned long tcp_ack = 0;
3027         unsigned char tcp_ctl = 0;
3028         int unreachable = 0;
3029
3030         struct timeval tv;
3031
3032         switch (si->family) {
3033         case AF_INET:
3034                 break;
3035 #ifdef HAVE_IPV6
3036         case AF_INET6:
3037                 break;
3038 #endif
3039         default:
3040                 return NULL;
3041         }
3042
3043         switch (type) {
3044         case SWRAP_CONNECT_SEND:
3045                 if (si->type != SOCK_STREAM) {
3046                         return NULL;
3047                 }
3048
3049                 src_addr  = &si->myname.sa.s;
3050                 dest_addr = addr;
3051
3052                 tcp_seqno = si->io.pck_snd;
3053                 tcp_ack = si->io.pck_rcv;
3054                 tcp_ctl = 0x02; /* SYN */
3055
3056                 si->io.pck_snd += 1;
3057
3058                 break;
3059
3060         case SWRAP_CONNECT_RECV:
3061                 if (si->type != SOCK_STREAM) {
3062                         return NULL;
3063                 }
3064
3065                 dest_addr = &si->myname.sa.s;
3066                 src_addr = addr;
3067
3068                 tcp_seqno = si->io.pck_rcv;
3069                 tcp_ack = si->io.pck_snd;
3070                 tcp_ctl = 0x12; /** SYN,ACK */
3071
3072                 si->io.pck_rcv += 1;
3073
3074                 break;
3075
3076         case SWRAP_CONNECT_UNREACH:
3077                 if (si->type != SOCK_STREAM) {
3078                         return NULL;
3079                 }
3080
3081                 dest_addr = &si->myname.sa.s;
3082                 src_addr  = addr;
3083
3084                 /* Unreachable: resend the data of SWRAP_CONNECT_SEND */
3085                 tcp_seqno = si->io.pck_snd - 1;
3086                 tcp_ack = si->io.pck_rcv;
3087                 tcp_ctl = 0x02; /* SYN */
3088                 unreachable = 1;
3089
3090                 break;
3091
3092         case SWRAP_CONNECT_ACK:
3093                 if (si->type != SOCK_STREAM) {
3094                         return NULL;
3095                 }
3096
3097                 src_addr  = &si->myname.sa.s;
3098                 dest_addr = addr;
3099
3100                 tcp_seqno = si->io.pck_snd;
3101                 tcp_ack = si->io.pck_rcv;
3102                 tcp_ctl = 0x10; /* ACK */
3103
3104                 break;
3105
3106         case SWRAP_ACCEPT_SEND:
3107                 if (si->type != SOCK_STREAM) {
3108                         return NULL;
3109                 }
3110
3111                 dest_addr = &si->myname.sa.s;
3112                 src_addr = addr;
3113
3114                 tcp_seqno = si->io.pck_rcv;
3115                 tcp_ack = si->io.pck_snd;
3116                 tcp_ctl = 0x02; /* SYN */
3117
3118                 si->io.pck_rcv += 1;
3119
3120                 break;
3121
3122         case SWRAP_ACCEPT_RECV:
3123                 if (si->type != SOCK_STREAM) {
3124                         return NULL;
3125                 }
3126
3127                 src_addr = &si->myname.sa.s;
3128                 dest_addr = addr;
3129
3130                 tcp_seqno = si->io.pck_snd;
3131                 tcp_ack = si->io.pck_rcv;
3132                 tcp_ctl = 0x12; /* SYN,ACK */
3133
3134                 si->io.pck_snd += 1;
3135
3136                 break;
3137
3138         case SWRAP_ACCEPT_ACK:
3139                 if (si->type != SOCK_STREAM) {
3140                         return NULL;
3141                 }
3142
3143                 dest_addr = &si->myname.sa.s;
3144                 src_addr = addr;
3145
3146                 tcp_seqno = si->io.pck_rcv;
3147                 tcp_ack = si->io.pck_snd;
3148                 tcp_ctl = 0x10; /* ACK */
3149
3150                 break;
3151
3152         case SWRAP_SEND:
3153                 src_addr  = &si->myname.sa.s;
3154                 dest_addr = &si->peername.sa.s;
3155
3156                 tcp_seqno = si->io.pck_snd;
3157                 tcp_ack = si->io.pck_rcv;
3158                 tcp_ctl = 0x18; /* PSH,ACK */
3159
3160                 si->io.pck_snd += len;
3161
3162                 break;
3163
3164         case SWRAP_SEND_RST:
3165                 dest_addr = &si->myname.sa.s;
3166                 src_addr  = &si->peername.sa.s;
3167
3168                 if (si->type == SOCK_DGRAM) {
3169                         return swrap_pcap_marshall_packet(si,
3170                                                           &si->peername.sa.s,
3171                                                           SWRAP_SENDTO_UNREACH,
3172                                                           buf,
3173                                                           len,
3174                                                           packet_len);
3175                 }
3176
3177                 tcp_seqno = si->io.pck_rcv;
3178                 tcp_ack = si->io.pck_snd;
3179                 tcp_ctl = 0x14; /** RST,ACK */
3180
3181                 break;
3182
3183         case SWRAP_PENDING_RST:
3184                 dest_addr = &si->myname.sa.s;
3185                 src_addr  = &si->peername.sa.s;
3186
3187                 if (si->type == SOCK_DGRAM) {
3188                         return NULL;
3189                 }
3190
3191                 tcp_seqno = si->io.pck_rcv;
3192                 tcp_ack = si->io.pck_snd;
3193                 tcp_ctl = 0x14; /* RST,ACK */
3194
3195                 break;
3196
3197         case SWRAP_RECV:
3198                 dest_addr = &si->myname.sa.s;
3199                 src_addr  = &si->peername.sa.s;
3200
3201                 tcp_seqno = si->io.pck_rcv;
3202                 tcp_ack = si->io.pck_snd;
3203                 tcp_ctl = 0x18; /* PSH,ACK */
3204
3205                 si->io.pck_rcv += len;
3206
3207                 break;
3208
3209         case SWRAP_RECV_RST:
3210                 dest_addr = &si->myname.sa.s;
3211                 src_addr  = &si->peername.sa.s;
3212
3213                 if (si->type == SOCK_DGRAM) {
3214                         return NULL;
3215                 }
3216
3217                 tcp_seqno = si->io.pck_rcv;
3218                 tcp_ack = si->io.pck_snd;
3219                 tcp_ctl = 0x14; /* RST,ACK */
3220
3221                 break;
3222
3223         case SWRAP_SENDTO:
3224                 src_addr = &si->myname.sa.s;
3225                 dest_addr = addr;
3226
3227                 si->io.pck_snd += len;
3228
3229                 break;
3230
3231         case SWRAP_SENDTO_UNREACH:
3232                 dest_addr = &si->myname.sa.s;
3233                 src_addr = addr;
3234
3235                 unreachable = 1;
3236
3237                 break;
3238
3239         case SWRAP_RECVFROM:
3240                 dest_addr = &si->myname.sa.s;
3241                 src_addr = addr;
3242
3243                 si->io.pck_rcv += len;
3244
3245                 break;
3246
3247         case SWRAP_CLOSE_SEND:
3248                 if (si->type != SOCK_STREAM) {
3249                         return NULL;
3250                 }
3251
3252                 src_addr  = &si->myname.sa.s;
3253                 dest_addr = &si->peername.sa.s;
3254
3255                 tcp_seqno = si->io.pck_snd;
3256                 tcp_ack = si->io.pck_rcv;
3257                 tcp_ctl = 0x11; /* FIN, ACK */
3258
3259                 si->io.pck_snd += 1;
3260
3261                 break;
3262
3263         case SWRAP_CLOSE_RECV:
3264                 if (si->type != SOCK_STREAM) {
3265                         return NULL;
3266                 }
3267
3268                 dest_addr = &si->myname.sa.s;
3269                 src_addr  = &si->peername.sa.s;
3270
3271                 tcp_seqno = si->io.pck_rcv;
3272                 tcp_ack = si->io.pck_snd;
3273                 tcp_ctl = 0x11; /* FIN,ACK */
3274
3275                 si->io.pck_rcv += 1;
3276
3277                 break;
3278
3279         case SWRAP_CLOSE_ACK:
3280                 if (si->type != SOCK_STREAM) {
3281                         return NULL;
3282                 }
3283
3284                 src_addr  = &si->myname.sa.s;
3285                 dest_addr = &si->peername.sa.s;
3286
3287                 tcp_seqno = si->io.pck_snd;
3288                 tcp_ack = si->io.pck_rcv;
3289                 tcp_ctl = 0x10; /* ACK */
3290
3291                 break;
3292         default:
3293                 return NULL;
3294         }
3295
3296         swrapGetTimeOfDay(&tv);
3297
3298         return swrap_pcap_packet_init(&tv,
3299                                       src_addr,
3300                                       dest_addr,
3301                                       si->type,
3302                                       (const uint8_t *)buf,
3303                                       len,
3304                                       tcp_seqno,
3305                                       tcp_ack,
3306                                       tcp_ctl,
3307                                       unreachable,
3308                                       packet_len);
3309 }
3310
3311 static void swrap_pcap_dump_packet(struct socket_info *si,
3312                                    const struct sockaddr *addr,
3313                                    enum swrap_packet_type type,
3314                                    const void *buf, size_t len)
3315 {
3316         const char *file_name;
3317         uint8_t *packet;
3318         size_t packet_len = 0;
3319         int fd;
3320
3321         swrap_mutex_lock(&pcap_dump_mutex);
3322
3323         file_name = swrap_pcap_init_file();
3324         if (!file_name) {
3325                 goto done;
3326         }
3327
3328         packet = swrap_pcap_marshall_packet(si,
3329                                             addr,
3330                                             type,
3331                                             buf,
3332                                             len,
3333                                             &packet_len);
3334         if (packet == NULL) {
3335                 goto done;
3336         }
3337
3338         fd = swrap_pcap_get_fd(file_name);
3339         if (fd != -1) {
3340                 if (libc_write(fd, packet, packet_len) != (ssize_t)packet_len) {
3341                         free(packet);
3342                         goto done;
3343                 }
3344         }
3345
3346         free(packet);
3347
3348 done:
3349         swrap_mutex_unlock(&pcap_dump_mutex);
3350 }
3351
3352 /****************************************************************************
3353  *   SIGNALFD
3354  ***************************************************************************/
3355
3356 #ifdef HAVE_SIGNALFD
3357 static int swrap_signalfd(int fd, const sigset_t *mask, int flags)
3358 {
3359         int rc;
3360
3361         rc = libc_signalfd(fd, mask, flags);
3362         if (rc != -1) {
3363                 swrap_remove_stale(fd);
3364         }
3365
3366         return rc;
3367 }
3368
3369 int signalfd(int fd, const sigset_t *mask, int flags)
3370 {
3371         return swrap_signalfd(fd, mask, flags);
3372 }
3373 #endif
3374
3375 /****************************************************************************
3376  *   SOCKET
3377  ***************************************************************************/
3378
3379 static int swrap_socket(int family, int type, int protocol)
3380 {
3381         struct socket_info *si = NULL;
3382         struct socket_info _si = { 0 };
3383         int fd;
3384         int ret;
3385         int real_type = type;
3386
3387         /*
3388          * Remove possible addition flags passed to socket() so
3389          * do not fail checking the type.
3390          * See https://lwn.net/Articles/281965/
3391          */
3392 #ifdef SOCK_CLOEXEC
3393         real_type &= ~SOCK_CLOEXEC;
3394 #endif
3395 #ifdef SOCK_NONBLOCK
3396         real_type &= ~SOCK_NONBLOCK;
3397 #endif
3398
3399         if (!socket_wrapper_enabled()) {
3400                 return libc_socket(family, type, protocol);
3401         }
3402
3403         switch (family) {
3404         case AF_INET:
3405 #ifdef HAVE_IPV6
3406         case AF_INET6:
3407 #endif
3408                 break;
3409 #ifdef AF_NETLINK
3410         case AF_NETLINK:
3411 #endif /* AF_NETLINK */
3412 #ifdef AF_PACKET
3413         case AF_PACKET:
3414 #endif /* AF_PACKET */
3415         case AF_UNIX:
3416                 fd = libc_socket(family, type, protocol);
3417                 if (fd != -1) {
3418                         /* Check if we have a stale fd and remove it */
3419                         swrap_remove_stale(fd);
3420                         SWRAP_LOG(SWRAP_LOG_TRACE,
3421                                   "Unix socket fd=%d",
3422                                   fd);
3423                 }
3424                 return fd;
3425         default:
3426                 errno = EAFNOSUPPORT;
3427                 return -1;
3428         }
3429
3430         switch (real_type) {
3431         case SOCK_STREAM:
3432                 break;
3433         case SOCK_DGRAM:
3434                 break;
3435         default:
3436                 errno = EPROTONOSUPPORT;
3437                 return -1;
3438         }
3439
3440         switch (protocol) {
3441         case 0:
3442                 break;
3443         case 6:
3444                 if (real_type == SOCK_STREAM) {
3445                         break;
3446                 }
3447                 FALL_THROUGH;
3448         case 17:
3449                 if (real_type == SOCK_DGRAM) {
3450                         break;
3451                 }
3452                 FALL_THROUGH;
3453         default:
3454                 errno = EPROTONOSUPPORT;
3455                 return -1;
3456         }
3457
3458         /*
3459          * We must call libc_socket with type, from the caller, not the version
3460          * we removed SOCK_CLOEXEC and SOCK_NONBLOCK from
3461          */
3462         fd = libc_socket(AF_UNIX, type, 0);
3463
3464         if (fd == -1) {
3465                 return -1;
3466         }
3467
3468         /* Check if we have a stale fd and remove it */
3469         swrap_remove_stale(fd);
3470
3471         si = &_si;
3472         si->family = family;
3473
3474         /* however, the rest of the socket_wrapper code expects just
3475          * the type, not the flags */
3476         si->type = real_type;
3477         si->protocol = protocol;
3478
3479         /*
3480          * Setup myname so getsockname() can succeed to find out the socket
3481          * type.
3482          */
3483         switch(si->family) {
3484         case AF_INET: {
3485                 struct sockaddr_in sin = {
3486                         .sin_family = AF_INET,
3487                 };
3488
3489                 si->myname.sa_socklen = sizeof(struct sockaddr_in);
3490                 memcpy(&si->myname.sa.in, &sin, si->myname.sa_socklen);
3491                 break;
3492         }
3493 #ifdef HAVE_IPV6
3494         case AF_INET6: {
3495                 struct sockaddr_in6 sin6 = {
3496                         .sin6_family = AF_INET6,
3497                 };
3498
3499                 si->myname.sa_socklen = sizeof(struct sockaddr_in6);
3500                 memcpy(&si->myname.sa.in6, &sin6, si->myname.sa_socklen);
3501                 break;
3502         }
3503 #endif
3504         default:
3505                 errno = EINVAL;
3506                 return -1;
3507         }
3508
3509         ret = swrap_create_socket(si, fd);
3510         if (ret == -1) {
3511                 int saved_errno = errno;
3512                 libc_close(fd);
3513                 errno = saved_errno;
3514                 return -1;
3515         }
3516
3517         SWRAP_LOG(SWRAP_LOG_TRACE,
3518                   "Created %s socket for protocol %s, fd=%d",
3519                   family == AF_INET ? "IPv4" : "IPv6",
3520                   real_type == SOCK_DGRAM ? "UDP" : "TCP",
3521                   fd);
3522
3523         return fd;
3524 }
3525
3526 int socket(int family, int type, int protocol)
3527 {
3528         return swrap_socket(family, type, protocol);
3529 }
3530
3531 /****************************************************************************
3532  *   SOCKETPAIR
3533  ***************************************************************************/
3534
3535 static int swrap_socketpair(int family, int type, int protocol, int sv[2])
3536 {
3537         int rc;
3538
3539         rc = libc_socketpair(family, type, protocol, sv);
3540         if (rc != -1) {
3541                 swrap_remove_stale(sv[0]);
3542                 swrap_remove_stale(sv[1]);
3543         }
3544
3545         return rc;
3546 }
3547
3548 int socketpair(int family, int type, int protocol, int sv[2])
3549 {
3550         return swrap_socketpair(family, type, protocol, sv);
3551 }
3552
3553 /****************************************************************************
3554  *   SOCKETPAIR
3555  ***************************************************************************/
3556
3557 #ifdef HAVE_TIMERFD_CREATE
3558 static int swrap_timerfd_create(int clockid, int flags)
3559 {
3560         int fd;
3561
3562         fd = libc_timerfd_create(clockid, flags);
3563         if (fd != -1) {
3564                 swrap_remove_stale(fd);
3565         }
3566
3567         return fd;
3568 }
3569
3570 int timerfd_create(int clockid, int flags)
3571 {
3572         return swrap_timerfd_create(clockid, flags);
3573 }
3574 #endif
3575
3576 /****************************************************************************
3577  *   PIPE
3578  ***************************************************************************/
3579
3580 static int swrap_pipe(int pipefd[2])
3581 {
3582         int rc;
3583
3584         rc = libc_pipe(pipefd);
3585         if (rc != -1) {
3586                 swrap_remove_stale(pipefd[0]);
3587                 swrap_remove_stale(pipefd[1]);
3588         }
3589
3590         return rc;
3591 }
3592
3593 int pipe(int pipefd[2])
3594 {
3595         return swrap_pipe(pipefd);
3596 }
3597
3598 /****************************************************************************
3599  *   ACCEPT
3600  ***************************************************************************/
3601
3602 static int swrap_accept(int s,
3603                         struct sockaddr *addr,
3604                         socklen_t *addrlen,
3605                         int flags)
3606 {
3607         struct socket_info *parent_si, *child_si;
3608         struct socket_info new_si = { 0 };
3609         int fd;
3610         int idx;
3611         struct swrap_address un_addr = {
3612                 .sa_socklen = sizeof(struct sockaddr_un),
3613         };
3614         struct swrap_address un_my_addr = {
3615                 .sa_socklen = sizeof(struct sockaddr_un),
3616         };
3617         struct swrap_address in_addr = {
3618                 .sa_socklen = sizeof(struct sockaddr_storage),
3619         };
3620         struct swrap_address in_my_addr = {
3621                 .sa_socklen = sizeof(struct sockaddr_storage),
3622         };
3623         int ret;
3624
3625         parent_si = find_socket_info(s);
3626         if (!parent_si) {
3627 #ifdef HAVE_ACCEPT4
3628                 return libc_accept4(s, addr, addrlen, flags);
3629 #else
3630                 UNUSED(flags);
3631                 return libc_accept(s, addr, addrlen);
3632 #endif
3633         }
3634
3635
3636         /*
3637          * prevent parent_si from being altered / closed
3638          * while we read it
3639          */
3640         SWRAP_LOCK_SI(parent_si);
3641
3642         /*
3643          * assume out sockaddr have the same size as the in parent
3644          * socket family
3645          */
3646         in_addr.sa_socklen = socket_length(parent_si->family);
3647         if (in_addr.sa_socklen <= 0) {
3648                 SWRAP_UNLOCK_SI(parent_si);
3649                 errno = EINVAL;
3650                 return -1;
3651         }
3652
3653         SWRAP_UNLOCK_SI(parent_si);
3654
3655 #ifdef HAVE_ACCEPT4
3656         ret = libc_accept4(s, &un_addr.sa.s, &un_addr.sa_socklen, flags);
3657 #else
3658         UNUSED(flags);
3659         ret = libc_accept(s, &un_addr.sa.s, &un_addr.sa_socklen);
3660 #endif
3661         if (ret == -1) {
3662                 int saved_errno = errno;
3663                 if (saved_errno == ENOTSOCK) {
3664                         /* Remove stale fds */
3665                         swrap_remove_stale(s);
3666                 }
3667                 errno = saved_errno;
3668                 return ret;
3669         }
3670
3671         fd = ret;
3672
3673         /* Check if we have a stale fd and remove it */
3674         swrap_remove_stale(fd);
3675
3676         if (un_addr.sa.un.sun_path[0] == '\0') {
3677                 /*
3678                  * FreeBSD seems to have a problem where
3679                  * accept4() on the unix socket doesn't
3680                  * ECONNABORTED for already disconnected connections.
3681                  *
3682                  * Let's try libc_getpeername() to get the peer address
3683                  * as a fallback, but it'll likely return ENOTCONN,
3684                  * which we have to map to ECONNABORTED.
3685                  */
3686                 un_addr.sa_socklen = sizeof(struct sockaddr_un),
3687                 ret = libc_getpeername(fd, &un_addr.sa.s, &un_addr.sa_socklen);
3688                 if (ret == -1) {
3689                         int saved_errno = errno;
3690                         libc_close(fd);
3691                         if (saved_errno == ENOTCONN) {
3692                                 /*
3693                                  * If the connection is already disconnected
3694                                  * we should return ECONNABORTED.
3695                                  */
3696                                 saved_errno = ECONNABORTED;
3697                         }
3698                         errno = saved_errno;
3699                         return ret;
3700                 }
3701         }
3702
3703         ret = libc_getsockname(fd,
3704                                &un_my_addr.sa.s,
3705                                &un_my_addr.sa_socklen);
3706         if (ret == -1) {
3707                 int saved_errno = errno;
3708                 libc_close(fd);
3709                 if (saved_errno == ENOTCONN) {
3710                         /*
3711                          * If the connection is already disconnected
3712                          * we should return ECONNABORTED.
3713                          */
3714                         saved_errno = ECONNABORTED;
3715                 }
3716                 errno = saved_errno;
3717                 return ret;
3718         }
3719
3720         SWRAP_LOCK_SI(parent_si);
3721
3722         ret = sockaddr_convert_from_un(parent_si,
3723                                        &un_addr.sa.un,
3724                                        un_addr.sa_socklen,
3725                                        parent_si->family,
3726                                        &in_addr.sa.s,
3727                                        &in_addr.sa_socklen);
3728         if (ret == -1) {
3729                 int saved_errno = errno;
3730                 SWRAP_UNLOCK_SI(parent_si);
3731                 libc_close(fd);
3732                 errno = saved_errno;
3733                 return ret;
3734         }
3735
3736         child_si = &new_si;
3737
3738         child_si->family = parent_si->family;
3739         child_si->type = parent_si->type;
3740         child_si->protocol = parent_si->protocol;
3741         child_si->bound = 1;
3742         child_si->is_server = 1;
3743         child_si->connected = 1;
3744
3745         SWRAP_UNLOCK_SI(parent_si);
3746
3747         child_si->peername = (struct swrap_address) {
3748                 .sa_socklen = in_addr.sa_socklen,
3749         };
3750         memcpy(&child_si->peername.sa.ss, &in_addr.sa.ss, in_addr.sa_socklen);
3751
3752         if (addr != NULL && addrlen != NULL) {
3753                 size_t copy_len = MIN(*addrlen, in_addr.sa_socklen);
3754                 if (copy_len > 0) {
3755                         memcpy(addr, &in_addr.sa.ss, copy_len);
3756                 }
3757                 *addrlen = in_addr.sa_socklen;
3758         }
3759
3760         ret = sockaddr_convert_from_un(child_si,
3761                                        &un_my_addr.sa.un,
3762                                        un_my_addr.sa_socklen,
3763                                        child_si->family,
3764                                        &in_my_addr.sa.s,
3765                                        &in_my_addr.sa_socklen);
3766         if (ret == -1) {
3767                 int saved_errno = errno;
3768                 libc_close(fd);
3769                 errno = saved_errno;
3770                 return ret;
3771         }
3772
3773         SWRAP_LOG(SWRAP_LOG_TRACE,
3774                   "accept() path=%s, fd=%d",
3775                   un_my_addr.sa.un.sun_path, s);
3776
3777         child_si->myname = (struct swrap_address) {
3778                 .sa_socklen = in_my_addr.sa_socklen,
3779         };
3780         memcpy(&child_si->myname.sa.ss, &in_my_addr.sa.ss, in_my_addr.sa_socklen);
3781
3782         idx = swrap_create_socket(&new_si, fd);
3783         if (idx == -1) {
3784                 int saved_errno = errno;
3785                 libc_close(fd);
3786                 errno = saved_errno;
3787                 return -1;
3788         }
3789
3790         if (addr != NULL) {
3791                 struct socket_info *si = swrap_get_socket_info(idx);
3792
3793                 SWRAP_LOCK_SI(si);
3794                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_SEND, NULL, 0);
3795                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_RECV, NULL, 0);
3796                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_ACK, NULL, 0);
3797                 SWRAP_UNLOCK_SI(si);
3798         }
3799
3800         return fd;
3801 }
3802
3803 #ifdef HAVE_ACCEPT4
3804 int accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags)
3805 {
3806         return swrap_accept(s, addr, (socklen_t *)addrlen, flags);
3807 }
3808 #endif
3809
3810 #ifdef HAVE_ACCEPT_PSOCKLEN_T
3811 int accept(int s, struct sockaddr *addr, Psocklen_t addrlen)
3812 #else
3813 int accept(int s, struct sockaddr *addr, socklen_t *addrlen)
3814 #endif
3815 {
3816         return swrap_accept(s, addr, (socklen_t *)addrlen, 0);
3817 }
3818
3819 static int autobind_start_init;
3820 static int autobind_start;
3821
3822 /* using sendto() or connect() on an unbound socket would give the
3823    recipient no way to reply, as unlike UDP and TCP, a unix domain
3824    socket can't auto-assign ephemeral port numbers, so we need to
3825    assign it here.
3826    Note: this might change the family from ipv6 to ipv4
3827 */
3828 static int swrap_auto_bind(int fd, struct socket_info *si, int family)
3829 {
3830         struct swrap_address un_addr = {
3831                 .sa_socklen = sizeof(struct sockaddr_un),
3832         };
3833         int i;
3834         char type;
3835         int ret;
3836         int port;
3837         char *swrap_dir = NULL;
3838
3839         swrap_mutex_lock(&autobind_start_mutex);
3840
3841         if (autobind_start_init != 1) {
3842                 autobind_start_init = 1;
3843                 autobind_start = getpid();
3844                 autobind_start %= 50000;
3845                 autobind_start += 10000;
3846         }
3847
3848         un_addr.sa.un.sun_family = AF_UNIX;
3849
3850         switch (family) {
3851         case AF_INET: {
3852                 struct sockaddr_in in;
3853
3854                 switch (si->type) {
3855                 case SOCK_STREAM:
3856                         type = SOCKET_TYPE_CHAR_TCP;
3857                         break;
3858                 case SOCK_DGRAM:
3859                         type = SOCKET_TYPE_CHAR_UDP;
3860                         break;
3861                 default:
3862                         errno = ESOCKTNOSUPPORT;
3863                         ret = -1;
3864                         goto done;
3865                 }
3866
3867                 memset(&in, 0, sizeof(in));
3868                 in.sin_family = AF_INET;
3869                 in.sin_addr.s_addr = htonl(swrap_ipv4_iface(
3870                                            socket_wrapper_default_iface()));
3871
3872                 si->myname = (struct swrap_address) {
3873                         .sa_socklen = sizeof(in),
3874                 };
3875                 memcpy(&si->myname.sa.in, &in, si->myname.sa_socklen);
3876                 break;
3877         }
3878 #ifdef HAVE_IPV6
3879         case AF_INET6: {
3880                 struct sockaddr_in6 in6;
3881
3882                 if (si->family != family) {
3883                         errno = ENETUNREACH;
3884                         ret = -1;
3885                         goto done;
3886                 }
3887
3888                 switch (si->type) {
3889                 case SOCK_STREAM:
3890                         type = SOCKET_TYPE_CHAR_TCP_V6;
3891                         break;
3892                 case SOCK_DGRAM:
3893                         type = SOCKET_TYPE_CHAR_UDP_V6;
3894                         break;
3895                 default:
3896                         errno = ESOCKTNOSUPPORT;
3897                         ret = -1;
3898                         goto done;
3899                 }
3900
3901                 memset(&in6, 0, sizeof(in6));
3902                 in6.sin6_family = AF_INET6;
3903                 in6.sin6_addr = *swrap_ipv6();
3904                 in6.sin6_addr.s6_addr[15] = socket_wrapper_default_iface();
3905
3906                 si->myname = (struct swrap_address) {
3907                         .sa_socklen = sizeof(in6),
3908                 };
3909                 memcpy(&si->myname.sa.in6, &in6, si->myname.sa_socklen);
3910                 break;
3911         }
3912 #endif
3913         default:
3914                 errno = ESOCKTNOSUPPORT;
3915                 ret = -1;
3916                 goto done;
3917         }
3918
3919         if (autobind_start > 60000) {
3920                 autobind_start = 10000;
3921         }
3922
3923         swrap_dir = socket_wrapper_dir();
3924         if (swrap_dir == NULL) {
3925                 errno = EINVAL;
3926                 ret = -1;
3927                 goto done;
3928         }
3929
3930         for (i = 0; i < SOCKET_MAX_SOCKETS; i++) {
3931                 port = autobind_start + i;
3932                 swrap_un_path(&un_addr.sa.un,
3933                               swrap_dir,
3934                               type,
3935                               socket_wrapper_default_iface(),
3936                               port);
3937
3938                 ret = libc_bind(fd, &un_addr.sa.s, un_addr.sa_socklen);
3939                 if (ret == -1) {
3940                         if (errno == EALREADY || errno == EADDRINUSE) {
3941                                 continue;
3942                         }
3943                         goto done;
3944                 }
3945
3946                 si->un_addr = un_addr.sa.un;
3947
3948                 si->bound = 1;
3949                 autobind_start = port + 1;
3950                 break;
3951         }
3952         if (i == SOCKET_MAX_SOCKETS) {
3953                 SWRAP_LOG(SWRAP_LOG_ERROR, "Too many open unix sockets (%u) for "
3954                                            "interface "SOCKET_FORMAT,
3955                                            SOCKET_MAX_SOCKETS,
3956                                            type,
3957                                            socket_wrapper_default_iface(),
3958                                            0);
3959                 errno = ENFILE;
3960                 ret = -1;
3961                 goto done;
3962         }
3963
3964         si->family = family;
3965         set_port(si->family, port, &si->myname);
3966
3967         ret = 0;
3968
3969 done:
3970         SAFE_FREE(swrap_dir);
3971         swrap_mutex_unlock(&autobind_start_mutex);
3972         return ret;
3973 }
3974
3975 /****************************************************************************
3976  *   CONNECT
3977  ***************************************************************************/
3978
3979 static int swrap_connect(int s, const struct sockaddr *serv_addr,
3980                          socklen_t addrlen)
3981 {
3982         int ret;
3983         struct swrap_address un_addr = {
3984                 .sa_socklen = sizeof(struct sockaddr_un),
3985         };
3986         struct socket_info *si = find_socket_info(s);
3987         int bcast = 0;
3988
3989         if (!si) {
3990                 return libc_connect(s, serv_addr, addrlen);
3991         }
3992
3993         SWRAP_LOCK_SI(si);
3994
3995         if (si->bound == 0) {
3996                 ret = swrap_auto_bind(s, si, serv_addr->sa_family);
3997                 if (ret == -1) {
3998                         goto done;
3999                 }
4000         }
4001
4002         if (si->family != serv_addr->sa_family) {
4003                 SWRAP_LOG(SWRAP_LOG_ERROR,
4004                           "called for fd=%d (family=%d) called with invalid family=%d",
4005                           s, si->family, serv_addr->sa_family);
4006                 errno = EINVAL;
4007                 ret = -1;
4008                 goto done;
4009         }
4010
4011         ret = sockaddr_convert_to_un(si, serv_addr,
4012                                      addrlen, &un_addr.sa.un, 0, &bcast);
4013         if (ret == -1) {
4014                 goto done;
4015         }
4016
4017         if (bcast) {
4018                 errno = ENETUNREACH;
4019                 ret = -1;
4020                 goto done;
4021         }
4022
4023         if (si->type == SOCK_DGRAM) {
4024                 si->defer_connect = 1;
4025                 ret = 0;
4026         } else {
4027                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_SEND, NULL, 0);
4028
4029                 ret = libc_connect(s,
4030                                    &un_addr.sa.s,
4031                                    un_addr.sa_socklen);
4032         }
4033
4034         SWRAP_LOG(SWRAP_LOG_TRACE,
4035                   "connect() path=%s, fd=%d",
4036                   un_addr.sa.un.sun_path, s);
4037
4038
4039         /* to give better errors */
4040         if (ret == -1 && errno == ENOENT) {
4041                 errno = EHOSTUNREACH;
4042         }
4043
4044         if (ret == 0) {
4045                 si->peername = (struct swrap_address) {
4046                         .sa_socklen = addrlen,
4047                 };
4048
4049                 memcpy(&si->peername.sa.ss, serv_addr, addrlen);
4050                 si->connected = 1;
4051
4052                 /*
4053                  * When we connect() on a socket than we have to bind the
4054                  * outgoing connection on the interface we use for the
4055                  * transport. We already bound it on the right interface
4056                  * but here we have to update the name so getsockname()
4057                  * returns correct information.
4058                  */
4059                 if (si->bindname.sa_socklen > 0) {
4060                         si->myname = (struct swrap_address) {
4061                                 .sa_socklen = si->bindname.sa_socklen,
4062                         };
4063
4064                         memcpy(&si->myname.sa.ss,
4065                                &si->bindname.sa.ss,
4066                                si->bindname.sa_socklen);
4067
4068                         /* Cleanup bindname */
4069                         si->bindname = (struct swrap_address) {
4070                                 .sa_socklen = 0,
4071                         };
4072                 }
4073
4074                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_RECV, NULL, 0);
4075                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_ACK, NULL, 0);
4076         } else {
4077                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_UNREACH, NULL, 0);
4078         }
4079
4080 done:
4081         SWRAP_UNLOCK_SI(si);
4082         return ret;
4083 }
4084
4085 int connect(int s, const struct sockaddr *serv_addr, socklen_t addrlen)
4086 {
4087         return swrap_connect(s, serv_addr, addrlen);
4088 }
4089
4090 /****************************************************************************
4091  *   BIND
4092  ***************************************************************************/
4093
4094 static int swrap_bind(int s, const struct sockaddr *myaddr, socklen_t addrlen)
4095 {
4096         int ret;
4097         struct swrap_address un_addr = {
4098                 .sa_socklen = sizeof(struct sockaddr_un),
4099         };
4100         struct socket_info *si = find_socket_info(s);
4101         int bind_error = 0;
4102 #if 0 /* FIXME */
4103         bool in_use;
4104 #endif
4105
4106         if (!si) {
4107                 return libc_bind(s, myaddr, addrlen);
4108         }
4109
4110         SWRAP_LOCK_SI(si);
4111
4112         switch (si->family) {
4113         case AF_INET: {
4114                 const struct sockaddr_in *sin;
4115                 if (addrlen < sizeof(struct sockaddr_in)) {
4116                         bind_error = EINVAL;
4117                         break;
4118                 }
4119
4120                 sin = (const struct sockaddr_in *)(const void *)myaddr;
4121
4122                 if (sin->sin_family != AF_INET) {
4123                         bind_error = EAFNOSUPPORT;
4124                 }
4125
4126                 /* special case for AF_UNSPEC */
4127                 if (sin->sin_family == AF_UNSPEC &&
4128                     (sin->sin_addr.s_addr == htonl(INADDR_ANY)))
4129                 {
4130                         bind_error = 0;
4131                 }
4132
4133                 break;
4134         }
4135 #ifdef HAVE_IPV6
4136         case AF_INET6: {
4137                 const struct sockaddr_in6 *sin6;
4138                 if (addrlen < sizeof(struct sockaddr_in6)) {
4139                         bind_error = EINVAL;
4140                         break;
4141                 }
4142
4143                 sin6 = (const struct sockaddr_in6 *)(const void *)myaddr;
4144
4145                 if (sin6->sin6_family != AF_INET6) {
4146                         bind_error = EAFNOSUPPORT;
4147                 }
4148
4149                 break;
4150         }
4151 #endif
4152         default:
4153                 bind_error = EINVAL;
4154                 break;
4155         }
4156
4157         if (bind_error != 0) {
4158                 errno = bind_error;
4159                 ret = -1;
4160                 goto out;
4161         }
4162
4163 #if 0 /* FIXME */
4164         in_use = check_addr_port_in_use(myaddr, addrlen);
4165         if (in_use) {
4166                 errno = EADDRINUSE;
4167                 ret = -1;
4168                 goto out;
4169         }
4170 #endif
4171
4172         si->myname.sa_socklen = addrlen;
4173         memcpy(&si->myname.sa.ss, myaddr, addrlen);
4174
4175         ret = sockaddr_convert_to_un(si,
4176                                      myaddr,
4177                                      addrlen,
4178                                      &un_addr.sa.un,
4179                                      1,
4180                                      &si->bcast);
4181         if (ret == -1) {
4182                 goto out;
4183         }
4184
4185         unlink(un_addr.sa.un.sun_path);
4186
4187         ret = libc_bind(s, &un_addr.sa.s, un_addr.sa_socklen);
4188
4189         SWRAP_LOG(SWRAP_LOG_TRACE,
4190                   "bind() path=%s, fd=%d",
4191                   un_addr.sa.un.sun_path, s);
4192
4193         if (ret == 0) {
4194                 si->bound = 1;
4195         }
4196
4197 out:
4198         SWRAP_UNLOCK_SI(si);
4199
4200         return ret;
4201 }
4202
4203 int bind(int s, const struct sockaddr *myaddr, socklen_t addrlen)
4204 {
4205         return swrap_bind(s, myaddr, addrlen);
4206 }
4207
4208 /****************************************************************************
4209  *   BINDRESVPORT
4210  ***************************************************************************/
4211
4212 #ifdef HAVE_BINDRESVPORT
4213 static int swrap_getsockname(int s, struct sockaddr *name, socklen_t *addrlen);
4214
4215 static int swrap_bindresvport_sa(int sd, struct sockaddr *sa)
4216 {
4217         struct swrap_address myaddr = {
4218                 .sa_socklen = sizeof(struct sockaddr_storage),
4219         };
4220         socklen_t salen;
4221         static uint16_t port;
4222         uint16_t i;
4223         int rc = -1;
4224         int af;
4225
4226 #define SWRAP_STARTPORT 600
4227 #define SWRAP_ENDPORT (IPPORT_RESERVED - 1)
4228 #define SWRAP_NPORTS (SWRAP_ENDPORT - SWRAP_STARTPORT + 1)
4229
4230         if (port == 0) {
4231                 port = (getpid() % SWRAP_NPORTS) + SWRAP_STARTPORT;
4232         }
4233
4234         if (sa == NULL) {
4235                 salen = myaddr.sa_socklen;
4236                 sa = &myaddr.sa.s;
4237
4238                 rc = swrap_getsockname(sd, &myaddr.sa.s, &salen);
4239                 if (rc < 0) {
4240                         return -1;
4241                 }
4242
4243                 af = sa->sa_family;
4244                 memset(&myaddr.sa.ss, 0, salen);
4245         } else {
4246                 af = sa->sa_family;
4247         }
4248
4249         for (i = 0; i < SWRAP_NPORTS; i++, port++) {
4250                 switch(af) {
4251                 case AF_INET: {
4252                         struct sockaddr_in *sinp = (struct sockaddr_in *)(void *)sa;
4253
4254                         salen = sizeof(struct sockaddr_in);
4255                         sinp->sin_port = htons(port);
4256                         break;
4257                 }
4258                 case AF_INET6: {
4259                         struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)(void *)sa;
4260
4261                         salen = sizeof(struct sockaddr_in6);
4262                         sin6p->sin6_port = htons(port);
4263                         break;
4264                 }
4265                 default:
4266                         errno = EAFNOSUPPORT;
4267                         return -1;
4268                 }
4269                 sa->sa_family = af;
4270
4271                 if (port > SWRAP_ENDPORT) {
4272                         port = SWRAP_STARTPORT;
4273                 }
4274
4275                 rc = swrap_bind(sd, (struct sockaddr *)sa, salen);
4276                 if (rc == 0 || errno != EADDRINUSE) {
4277                         break;
4278                 }
4279         }
4280
4281         return rc;
4282 }
4283
4284 int bindresvport(int sockfd, struct sockaddr_in *sinp)
4285 {
4286         return swrap_bindresvport_sa(sockfd, (struct sockaddr *)sinp);
4287 }
4288 #endif
4289
4290 /****************************************************************************
4291  *   LISTEN
4292  ***************************************************************************/
4293
4294 static int swrap_listen(int s, int backlog)
4295 {
4296         int ret;
4297         struct socket_info *si = find_socket_info(s);
4298
4299         if (!si) {
4300                 return libc_listen(s, backlog);
4301         }
4302
4303         SWRAP_LOCK_SI(si);
4304
4305         if (si->bound == 0) {
4306                 ret = swrap_auto_bind(s, si, si->family);
4307                 if (ret == -1) {
4308                         errno = EADDRINUSE;
4309                         goto out;
4310                 }
4311         }
4312
4313         ret = libc_listen(s, backlog);
4314         if (ret == 0) {
4315                 si->listening = 1;
4316         }
4317
4318 out:
4319         SWRAP_UNLOCK_SI(si);
4320
4321         return ret;
4322 }
4323
4324 int listen(int s, int backlog)
4325 {
4326         return swrap_listen(s, backlog);
4327 }
4328
4329 /****************************************************************************
4330  *   FOPEN
4331  ***************************************************************************/
4332
4333 static FILE *swrap_fopen(const char *name, const char *mode)
4334 {
4335         FILE *fp;
4336
4337         fp = libc_fopen(name, mode);
4338         if (fp != NULL) {
4339                 int fd = fileno(fp);
4340
4341                 swrap_remove_stale(fd);
4342         }
4343
4344         return fp;
4345 }
4346
4347 FILE *fopen(const char *name, const char *mode)
4348 {
4349         return swrap_fopen(name, mode);
4350 }
4351
4352 /****************************************************************************
4353  *   FOPEN64
4354  ***************************************************************************/
4355
4356 #ifdef HAVE_FOPEN64
4357 static FILE *swrap_fopen64(const char *name, const char *mode)
4358 {
4359         FILE *fp;
4360
4361         fp = libc_fopen64(name, mode);
4362         if (fp != NULL) {
4363                 int fd = fileno(fp);
4364
4365                 swrap_remove_stale(fd);
4366         }
4367
4368         return fp;
4369 }
4370
4371 FILE *fopen64(const char *name, const char *mode)
4372 {
4373         return swrap_fopen64(name, mode);
4374 }
4375 #endif /* HAVE_FOPEN64 */
4376
4377 /****************************************************************************
4378  *   OPEN
4379  ***************************************************************************/
4380
4381 static int swrap_vopen(const char *pathname, int flags, va_list ap)
4382 {
4383         int ret;
4384
4385         ret = libc_vopen(pathname, flags, ap);
4386         if (ret != -1) {
4387                 /*
4388                  * There are methods for closing descriptors (libc-internal code
4389                  * paths, direct syscalls) which close descriptors in ways that
4390                  * we can't intercept, so try to recover when we notice that
4391                  * that's happened
4392                  */
4393                 swrap_remove_stale(ret);
4394         }
4395         return ret;
4396 }
4397
4398 int open(const char *pathname, int flags, ...)
4399 {
4400         va_list ap;
4401         int fd;
4402
4403         va_start(ap, flags);
4404         fd = swrap_vopen(pathname, flags, ap);
4405         va_end(ap);
4406
4407         return fd;
4408 }
4409
4410 /****************************************************************************
4411  *   OPEN64
4412  ***************************************************************************/
4413
4414 #ifdef HAVE_OPEN64
4415 static int swrap_vopen64(const char *pathname, int flags, va_list ap)
4416 {
4417         int ret;
4418
4419         ret = libc_vopen64(pathname, flags, ap);
4420         if (ret != -1) {
4421                 /*
4422                  * There are methods for closing descriptors (libc-internal code
4423                  * paths, direct syscalls) which close descriptors in ways that
4424                  * we can't intercept, so try to recover when we notice that
4425                  * that's happened
4426                  */
4427                 swrap_remove_stale(ret);
4428         }
4429         return ret;
4430 }
4431
4432 int open64(const char *pathname, int flags, ...)
4433 {
4434         va_list ap;
4435         int fd;
4436
4437         va_start(ap, flags);
4438         fd = swrap_vopen64(pathname, flags, ap);
4439         va_end(ap);
4440
4441         return fd;
4442 }
4443 #endif /* HAVE_OPEN64 */
4444
4445 /****************************************************************************
4446  *   OPENAT
4447  ***************************************************************************/
4448
4449 static int swrap_vopenat(int dirfd, const char *path, int flags, va_list ap)
4450 {
4451         int ret;
4452
4453         ret = libc_vopenat(dirfd, path, flags, ap);
4454         if (ret != -1) {
4455                 /*
4456                  * There are methods for closing descriptors (libc-internal code
4457                  * paths, direct syscalls) which close descriptors in ways that
4458                  * we can't intercept, so try to recover when we notice that
4459                  * that's happened
4460                  */
4461                 swrap_remove_stale(ret);
4462         }
4463
4464         return ret;
4465 }
4466
4467 int openat(int dirfd, const char *path, int flags, ...)
4468 {
4469         va_list ap;
4470         int fd;
4471
4472         va_start(ap, flags);
4473         fd = swrap_vopenat(dirfd, path, flags, ap);
4474         va_end(ap);
4475
4476         return fd;
4477 }
4478
4479 /****************************************************************************
4480  *   GETPEERNAME
4481  ***************************************************************************/
4482
4483 static int swrap_getpeername(int s, struct sockaddr *name, socklen_t *addrlen)
4484 {
4485         struct socket_info *si = find_socket_info(s);
4486         socklen_t len;
4487         int ret = -1;
4488
4489         if (!si) {
4490                 return libc_getpeername(s, name, addrlen);
4491         }
4492
4493         SWRAP_LOCK_SI(si);
4494
4495         if (si->peername.sa_socklen == 0)
4496         {
4497                 errno = ENOTCONN;
4498                 goto out;
4499         }
4500
4501         len = MIN(*addrlen, si->peername.sa_socklen);
4502         if (len == 0) {
4503                 ret = 0;
4504                 goto out;
4505         }
4506
4507         memcpy(name, &si->peername.sa.ss, len);
4508         *addrlen = si->peername.sa_socklen;
4509
4510         ret = 0;
4511 out:
4512         SWRAP_UNLOCK_SI(si);
4513
4514         return ret;
4515 }
4516
4517 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4518 int getpeername(int s, struct sockaddr *name, Psocklen_t addrlen)
4519 #else
4520 int getpeername(int s, struct sockaddr *name, socklen_t *addrlen)
4521 #endif
4522 {
4523         return swrap_getpeername(s, name, (socklen_t *)addrlen);
4524 }
4525
4526 /****************************************************************************
4527  *   GETSOCKNAME
4528  ***************************************************************************/
4529
4530 static int swrap_getsockname(int s, struct sockaddr *name, socklen_t *addrlen)
4531 {
4532         struct socket_info *si = find_socket_info(s);
4533         socklen_t len;
4534         int ret = -1;
4535
4536         if (!si) {
4537                 return libc_getsockname(s, name, addrlen);
4538         }
4539
4540         SWRAP_LOCK_SI(si);
4541
4542         len = MIN(*addrlen, si->myname.sa_socklen);
4543         if (len == 0) {
4544                 ret = 0;
4545                 goto out;
4546         }
4547
4548         memcpy(name, &si->myname.sa.ss, len);
4549         *addrlen = si->myname.sa_socklen;
4550
4551         ret = 0;
4552 out:
4553         SWRAP_UNLOCK_SI(si);
4554
4555         return ret;
4556 }
4557
4558 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4559 int getsockname(int s, struct sockaddr *name, Psocklen_t addrlen)
4560 #else
4561 int getsockname(int s, struct sockaddr *name, socklen_t *addrlen)
4562 #endif
4563 {
4564         return swrap_getsockname(s, name, (socklen_t *)addrlen);
4565 }
4566
4567 /****************************************************************************
4568  *   GETSOCKOPT
4569  ***************************************************************************/
4570
4571 #ifndef SO_PROTOCOL
4572 # ifdef SO_PROTOTYPE /* The Solaris name */
4573 #  define SO_PROTOCOL SO_PROTOTYPE
4574 # endif /* SO_PROTOTYPE */
4575 #endif /* SO_PROTOCOL */
4576
4577 static int swrap_getsockopt(int s, int level, int optname,
4578                             void *optval, socklen_t *optlen)
4579 {
4580         struct socket_info *si = find_socket_info(s);
4581         int ret;
4582
4583         if (!si) {
4584                 return libc_getsockopt(s,
4585                                        level,
4586                                        optname,
4587                                        optval,
4588                                        optlen);
4589         }
4590
4591         SWRAP_LOCK_SI(si);
4592
4593         if (level == SOL_SOCKET) {
4594                 switch (optname) {
4595 #ifdef SO_DOMAIN
4596                 case SO_DOMAIN:
4597                         if (optval == NULL || optlen == NULL ||
4598                             *optlen < (socklen_t)sizeof(int)) {
4599                                 errno = EINVAL;
4600                                 ret = -1;
4601                                 goto done;
4602                         }
4603
4604                         *optlen = sizeof(int);
4605                         *(int *)optval = si->family;
4606                         ret = 0;
4607                         goto done;
4608 #endif /* SO_DOMAIN */
4609
4610 #ifdef SO_PROTOCOL
4611                 case SO_PROTOCOL:
4612                         if (optval == NULL || optlen == NULL ||
4613                             *optlen < (socklen_t)sizeof(int)) {
4614                                 errno = EINVAL;
4615                                 ret = -1;
4616                                 goto done;
4617                         }
4618
4619                         *optlen = sizeof(int);
4620                         *(int *)optval = si->protocol;
4621                         ret = 0;
4622                         goto done;
4623 #endif /* SO_PROTOCOL */
4624                 case SO_TYPE:
4625                         if (optval == NULL || optlen == NULL ||
4626                             *optlen < (socklen_t)sizeof(int)) {
4627                                 errno = EINVAL;
4628                                 ret = -1;
4629                                 goto done;
4630                         }
4631
4632                         *optlen = sizeof(int);
4633                         *(int *)optval = si->type;
4634                         ret = 0;
4635                         goto done;
4636                 default:
4637                         ret = libc_getsockopt(s,
4638                                               level,
4639                                               optname,
4640                                               optval,
4641                                               optlen);
4642                         goto done;
4643                 }
4644         } else if (level == IPPROTO_TCP) {
4645                 switch (optname) {
4646 #ifdef TCP_NODELAY
4647                 case TCP_NODELAY:
4648                         /*
4649                          * This enables sending packets directly out over TCP.
4650                          * As a unix socket is doing that any way, report it as
4651                          * enabled.
4652                          */
4653                         if (optval == NULL || optlen == NULL ||
4654                             *optlen < (socklen_t)sizeof(int)) {
4655                                 errno = EINVAL;
4656                                 ret = -1;
4657                                 goto done;
4658                         }
4659
4660                         *optlen = sizeof(int);
4661                         *(int *)optval = si->tcp_nodelay;
4662
4663                         ret = 0;
4664                         goto done;
4665 #endif /* TCP_NODELAY */
4666 #ifdef TCP_INFO
4667                 case TCP_INFO: {
4668                         struct tcp_info info;
4669                         socklen_t ilen = sizeof(info);
4670
4671 #ifdef HAVE_NETINET_TCP_FSM_H
4672 /* This is FreeBSD */
4673 # define __TCP_LISTEN TCPS_LISTEN
4674 # define __TCP_ESTABLISHED TCPS_ESTABLISHED
4675 # define __TCP_CLOSE TCPS_CLOSED
4676 #else
4677 /* This is Linux */
4678 # define __TCP_LISTEN TCP_LISTEN
4679 # define __TCP_ESTABLISHED TCP_ESTABLISHED
4680 # define __TCP_CLOSE TCP_CLOSE
4681 #endif
4682
4683                         ZERO_STRUCT(info);
4684                         if (si->listening) {
4685                                 info.tcpi_state = __TCP_LISTEN;
4686                         } else if (si->connected) {
4687                                 /*
4688                                  * For now we just fake a few values
4689                                  * supported both by FreeBSD and Linux
4690                                  */
4691                                 info.tcpi_state = __TCP_ESTABLISHED;
4692                                 info.tcpi_rto = 200000;  /* 200 msec */
4693                                 info.tcpi_rtt = 5000;    /* 5 msec */
4694                                 info.tcpi_rttvar = 5000; /* 5 msec */
4695                         } else {
4696                                 info.tcpi_state = __TCP_CLOSE;
4697                                 info.tcpi_rto = 1000000;  /* 1 sec */
4698                                 info.tcpi_rtt = 0;
4699                                 info.tcpi_rttvar = 250000; /* 250 msec */
4700                         }
4701
4702                         if (optval == NULL || optlen == NULL ||
4703                             *optlen < (socklen_t)ilen) {
4704                                 errno = EINVAL;
4705                                 ret = -1;
4706                                 goto done;
4707                         }
4708
4709                         *optlen = ilen;
4710                         memcpy(optval, &info, ilen);
4711
4712                         ret = 0;
4713                         goto done;
4714                 }
4715 #endif /* TCP_INFO */
4716                 default:
4717                         break;
4718                 }
4719         }
4720
4721         errno = ENOPROTOOPT;
4722         ret = -1;
4723
4724 done:
4725         SWRAP_UNLOCK_SI(si);
4726         return ret;
4727 }
4728
4729 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4730 int getsockopt(int s, int level, int optname, void *optval, Psocklen_t optlen)
4731 #else
4732 int getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
4733 #endif
4734 {
4735         return swrap_getsockopt(s, level, optname, optval, (socklen_t *)optlen);
4736 }
4737
4738 /****************************************************************************
4739  *   SETSOCKOPT
4740  ***************************************************************************/
4741
4742 static int swrap_setsockopt(int s, int level, int optname,
4743                             const void *optval, socklen_t optlen)
4744 {
4745         struct socket_info *si = find_socket_info(s);
4746         int ret;
4747
4748         if (!si) {
4749                 return libc_setsockopt(s,
4750                                        level,
4751                                        optname,
4752                                        optval,
4753                                        optlen);
4754         }
4755
4756         if (level == SOL_SOCKET) {
4757                 return libc_setsockopt(s,
4758                                        level,
4759                                        optname,
4760                                        optval,
4761                                        optlen);
4762         }
4763
4764         SWRAP_LOCK_SI(si);
4765
4766         if (level == IPPROTO_TCP) {
4767                 switch (optname) {
4768 #ifdef TCP_NODELAY
4769                 case TCP_NODELAY: {
4770                         int i;
4771
4772                         /*
4773                          * This enables sending packets directly out over TCP.
4774                          * A unix socket is doing that any way.
4775                          */
4776                         if (optval == NULL || optlen == 0 ||
4777                             optlen < (socklen_t)sizeof(int)) {
4778                                 errno = EINVAL;
4779                                 ret = -1;
4780                                 goto done;
4781                         }
4782
4783                         i = *discard_const_p(int, optval);
4784                         if (i != 0 && i != 1) {
4785                                 errno = EINVAL;
4786                                 ret = -1;
4787                                 goto done;
4788                         }
4789                         si->tcp_nodelay = i;
4790
4791                         ret = 0;
4792                         goto done;
4793                 }
4794 #endif /* TCP_NODELAY */
4795                 default:
4796                         break;
4797                 }
4798         }
4799
4800         switch (si->family) {
4801         case AF_INET:
4802                 if (level == IPPROTO_IP) {
4803 #ifdef IP_PKTINFO
4804                         if (optname == IP_PKTINFO) {
4805                                 si->pktinfo = AF_INET;
4806                         }
4807 #endif /* IP_PKTINFO */
4808                 }
4809                 ret = 0;
4810                 goto done;
4811 #ifdef HAVE_IPV6
4812         case AF_INET6:
4813                 if (level == IPPROTO_IPV6) {
4814 #ifdef IPV6_RECVPKTINFO
4815                         if (optname == IPV6_RECVPKTINFO) {
4816                                 si->pktinfo = AF_INET6;
4817                         }
4818 #endif /* IPV6_PKTINFO */
4819                 }
4820                 ret = 0;
4821                 goto done;
4822 #endif
4823         default:
4824                 errno = ENOPROTOOPT;
4825                 ret = -1;
4826                 goto done;
4827         }
4828
4829 done:
4830         SWRAP_UNLOCK_SI(si);
4831         return ret;
4832 }
4833
4834 int setsockopt(int s, int level, int optname,
4835                const void *optval, socklen_t optlen)
4836 {
4837         return swrap_setsockopt(s, level, optname, optval, optlen);
4838 }
4839
4840 /****************************************************************************
4841  *   IOCTL
4842  ***************************************************************************/
4843
4844 static int swrap_vioctl(int s, unsigned long int r, va_list va)
4845 {
4846         struct socket_info *si = find_socket_info(s);
4847         va_list ap;
4848         int *value_ptr = NULL;
4849         int rc;
4850
4851         if (!si) {
4852                 return libc_vioctl(s, r, va);
4853         }
4854
4855         SWRAP_LOCK_SI(si);
4856
4857         va_copy(ap, va);
4858
4859         rc = libc_vioctl(s, r, va);
4860
4861         switch (r) {
4862         case FIONREAD:
4863                 if (rc == 0) {
4864                         value_ptr = ((int *)va_arg(ap, int *));
4865                 }
4866
4867                 if (rc == -1 && errno != EAGAIN && errno != ENOBUFS) {
4868                         swrap_pcap_dump_packet(si, NULL, SWRAP_PENDING_RST, NULL, 0);
4869                 } else if (value_ptr != NULL && *value_ptr == 0) { /* END OF FILE */
4870                         swrap_pcap_dump_packet(si, NULL, SWRAP_PENDING_RST, NULL, 0);
4871                 }
4872                 break;
4873 #ifdef FIONWRITE
4874         case FIONWRITE:
4875                 /* this is FreeBSD */
4876                 FALL_THROUGH; /* to TIOCOUTQ */
4877 #endif /* FIONWRITE */
4878         case TIOCOUTQ: /* same as SIOCOUTQ on Linux */
4879                 /*
4880                  * This may return more bytes then the application
4881                  * sent into the socket, for tcp it should
4882                  * return the number of unacked bytes.
4883                  *
4884                  * On AF_UNIX, all bytes are immediately acked!
4885                  */
4886                 if (rc == 0) {
4887                         value_ptr = ((int *)va_arg(ap, int *));
4888                         *value_ptr = 0;
4889                 }
4890                 break;
4891         }
4892
4893         va_end(ap);
4894
4895         SWRAP_UNLOCK_SI(si);
4896         return rc;
4897 }
4898
4899 #ifdef HAVE_IOCTL_INT
4900 int ioctl(int s, int r, ...)
4901 #else
4902 int ioctl(int s, unsigned long int r, ...)
4903 #endif
4904 {
4905         va_list va;
4906         int rc;
4907
4908         va_start(va, r);
4909
4910         rc = swrap_vioctl(s, (unsigned long int) r, va);
4911
4912         va_end(va);
4913
4914         return rc;
4915 }
4916
4917 /*****************
4918  * CMSG
4919  *****************/
4920
4921 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
4922
4923 #ifndef CMSG_ALIGN
4924 # ifdef _ALIGN /* BSD */
4925 #define CMSG_ALIGN _ALIGN
4926 # else
4927 #define CMSG_ALIGN(len) (((len) + sizeof(size_t) - 1) & ~(sizeof(size_t) - 1))
4928 # endif /* _ALIGN */
4929 #endif /* CMSG_ALIGN */
4930
4931 /**
4932  * @brief Add a cmsghdr to a msghdr.
4933  *
4934  * This is an function to add any type of cmsghdr. It will operate on the
4935  * msg->msg_control and msg->msg_controllen you pass in by adapting them to
4936  * the buffer position after the added cmsg element. Hence, this function is
4937  * intended to be used with an intermediate msghdr and not on the original
4938  * one handed in by the client.
4939  *
4940  * @param[in]  msg      The msghdr to which to add the cmsg.
4941  *
4942  * @param[in]  level    The cmsg level to set.
4943  *
4944  * @param[in]  type     The cmsg type to set.
4945  *
4946  * @param[in]  data     The cmsg data to set.
4947  *
4948  * @param[in]  len      the length of the data to set.
4949  */
4950 static void swrap_msghdr_add_cmsghdr(struct msghdr *msg,
4951                                      int level,
4952                                      int type,
4953                                      const void *data,
4954                                      size_t len)
4955 {
4956         size_t cmlen = CMSG_LEN(len);
4957         size_t cmspace = CMSG_SPACE(len);
4958         uint8_t cmbuf[cmspace];
4959         void *cast_ptr = (void *)cmbuf;
4960         struct cmsghdr *cm = (struct cmsghdr *)cast_ptr;
4961         uint8_t *p;
4962
4963         memset(cmbuf, 0, cmspace);
4964
4965         if (msg->msg_controllen < cmlen) {
4966                 cmlen = msg->msg_controllen;
4967                 msg->msg_flags |= MSG_CTRUNC;
4968         }
4969
4970         if (msg->msg_controllen < cmspace) {
4971                 cmspace = msg->msg_controllen;
4972         }
4973
4974         /*
4975          * We copy the full input data into an intermediate cmsghdr first
4976          * in order to more easily cope with truncation.
4977          */
4978         cm->cmsg_len = cmlen;
4979         cm->cmsg_level = level;
4980         cm->cmsg_type = type;
4981         memcpy(CMSG_DATA(cm), data, len);
4982
4983         /*
4984          * We now copy the possibly truncated buffer.
4985          * We copy cmlen bytes, but consume cmspace bytes,
4986          * leaving the possible padding uninitialiazed.
4987          */
4988         p = (uint8_t *)msg->msg_control;
4989         memcpy(p, cm, cmlen);
4990         p += cmspace;
4991         msg->msg_control = p;
4992         msg->msg_controllen -= cmspace;
4993
4994         return;
4995 }
4996
4997 static int swrap_msghdr_add_pktinfo(struct socket_info *si,
4998                                     struct msghdr *msg)
4999 {
5000         /* Add packet info */
5001         switch (si->pktinfo) {
5002 #if defined(IP_PKTINFO) && (defined(HAVE_STRUCT_IN_PKTINFO) || defined(IP_RECVDSTADDR))
5003         case AF_INET: {
5004                 struct sockaddr_in *sin;
5005 #if defined(HAVE_STRUCT_IN_PKTINFO)
5006                 struct in_pktinfo pkt;
5007 #elif defined(IP_RECVDSTADDR)
5008                 struct in_addr pkt;
5009 #endif
5010
5011                 if (si->bindname.sa_socklen == sizeof(struct sockaddr_in)) {
5012                         sin = &si->bindname.sa.in;
5013                 } else {
5014                         if (si->myname.sa_socklen != sizeof(struct sockaddr_in)) {
5015                                 return 0;
5016                         }
5017                         sin = &si->myname.sa.in;
5018                 }
5019
5020                 ZERO_STRUCT(pkt);
5021
5022 #if defined(HAVE_STRUCT_IN_PKTINFO)
5023                 pkt.ipi_ifindex = socket_wrapper_default_iface();
5024                 pkt.ipi_addr.s_addr = sin->sin_addr.s_addr;
5025 #elif defined(IP_RECVDSTADDR)
5026                 pkt = sin->sin_addr;
5027 #endif
5028
5029                 swrap_msghdr_add_cmsghdr(msg, IPPROTO_IP, IP_PKTINFO,
5030                                          &pkt, sizeof(pkt));
5031
5032                 break;
5033         }
5034 #endif /* IP_PKTINFO */
5035 #if defined(HAVE_IPV6)
5036         case AF_INET6: {
5037 #if defined(IPV6_PKTINFO) && defined(HAVE_STRUCT_IN6_PKTINFO)
5038                 struct sockaddr_in6 *sin6;
5039                 struct in6_pktinfo pkt6;
5040
5041                 if (si->bindname.sa_socklen == sizeof(struct sockaddr_in6)) {
5042                         sin6 = &si->bindname.sa.in6;
5043                 } else {
5044                         if (si->myname.sa_socklen != sizeof(struct sockaddr_in6)) {
5045                                 return 0;
5046                         }
5047                         sin6 = &si->myname.sa.in6;
5048                 }
5049
5050                 ZERO_STRUCT(pkt6);
5051
5052                 pkt6.ipi6_ifindex = socket_wrapper_default_iface();
5053                 pkt6.ipi6_addr = sin6->sin6_addr;
5054
5055                 swrap_msghdr_add_cmsghdr(msg, IPPROTO_IPV6, IPV6_PKTINFO,
5056                                         &pkt6, sizeof(pkt6));
5057 #endif /* HAVE_STRUCT_IN6_PKTINFO */
5058
5059                 break;
5060         }
5061 #endif /* IPV6_PKTINFO */
5062         default:
5063                 return -1;
5064         }
5065
5066         return 0;
5067 }
5068
5069 static int swrap_msghdr_add_socket_info(struct socket_info *si,
5070                                         struct msghdr *omsg)
5071 {
5072         int rc = 0;
5073
5074         if (si->pktinfo > 0) {
5075                 rc = swrap_msghdr_add_pktinfo(si, omsg);
5076         }
5077
5078         return rc;
5079 }
5080
5081 static int swrap_sendmsg_copy_cmsg(const struct cmsghdr *cmsg,
5082                                    uint8_t **cm_data,
5083                                    size_t *cm_data_space);
5084 static int swrap_sendmsg_filter_cmsg_ipproto_ip(const struct cmsghdr *cmsg,
5085                                                 uint8_t **cm_data,
5086                                                 size_t *cm_data_space);
5087 static int swrap_sendmsg_filter_cmsg_sol_socket(const struct cmsghdr *cmsg,
5088                                                 uint8_t **cm_data,
5089                                                 size_t *cm_data_space);
5090
5091 static int swrap_sendmsg_filter_cmsghdr(const struct msghdr *_msg,
5092                                         uint8_t **cm_data,
5093                                         size_t *cm_data_space)
5094 {
5095         struct msghdr *msg = discard_const_p(struct msghdr, _msg);
5096         struct cmsghdr *cmsg;
5097         int rc = -1;
5098
5099         /* Nothing to do */
5100         if (msg->msg_controllen == 0 || msg->msg_control == NULL) {
5101                 return 0;
5102         }
5103
5104         for (cmsg = CMSG_FIRSTHDR(msg);
5105              cmsg != NULL;
5106              cmsg = CMSG_NXTHDR(msg, cmsg)) {
5107                 switch (cmsg->cmsg_level) {
5108                 case IPPROTO_IP:
5109                         rc = swrap_sendmsg_filter_cmsg_ipproto_ip(cmsg,
5110                                                                   cm_data,
5111                                                                   cm_data_space);
5112                         break;
5113                 case SOL_SOCKET:
5114                         rc = swrap_sendmsg_filter_cmsg_sol_socket(cmsg,
5115                                                                   cm_data,
5116                                                                   cm_data_space);
5117                         break;
5118                 default:
5119                         rc = swrap_sendmsg_copy_cmsg(cmsg,
5120                                                      cm_data,
5121                                                      cm_data_space);
5122                         break;
5123                 }
5124                 if (rc < 0) {
5125                         int saved_errno = errno;
5126                         SAFE_FREE(*cm_data);
5127                         *cm_data_space = 0;
5128                         errno = saved_errno;
5129                         return rc;
5130                 }
5131         }
5132
5133         return rc;
5134 }
5135
5136 static int swrap_sendmsg_copy_cmsg(const struct cmsghdr *cmsg,
5137                                    uint8_t **cm_data,
5138                                    size_t *cm_data_space)
5139 {
5140         size_t cmspace;
5141         uint8_t *p;
5142
5143         cmspace = *cm_data_space + CMSG_ALIGN(cmsg->cmsg_len);
5144
5145         p = realloc((*cm_data), cmspace);
5146         if (p == NULL) {
5147                 return -1;
5148         }
5149         (*cm_data) = p;
5150
5151         p = (*cm_data) + (*cm_data_space);
5152         *cm_data_space = cmspace;
5153
5154         memcpy(p, cmsg, cmsg->cmsg_len);
5155
5156         return 0;
5157 }
5158
5159 static int swrap_sendmsg_filter_cmsg_pktinfo(const struct cmsghdr *cmsg,
5160                                             uint8_t **cm_data,
5161                                             size_t *cm_data_space);
5162
5163
5164 static int swrap_sendmsg_filter_cmsg_ipproto_ip(const struct cmsghdr *cmsg,
5165                                                 uint8_t **cm_data,
5166                                                 size_t *cm_data_space)
5167 {
5168         int rc = -1;
5169
5170         switch(cmsg->cmsg_type) {
5171 #ifdef IP_PKTINFO
5172         case IP_PKTINFO:
5173                 rc = swrap_sendmsg_filter_cmsg_pktinfo(cmsg,
5174                                                        cm_data,
5175                                                        cm_data_space);
5176                 break;
5177 #endif
5178 #ifdef IPV6_PKTINFO
5179         case IPV6_PKTINFO:
5180                 rc = swrap_sendmsg_filter_cmsg_pktinfo(cmsg,
5181                                                        cm_data,
5182                                                        cm_data_space);
5183                 break;
5184 #endif
5185         default:
5186                 break;
5187         }
5188
5189         return rc;
5190 }
5191
5192 static int swrap_sendmsg_filter_cmsg_pktinfo(const struct cmsghdr *cmsg,
5193                                              uint8_t **cm_data,
5194                                              size_t *cm_data_space)
5195 {
5196         (void)cmsg; /* unused */
5197         (void)cm_data; /* unused */
5198         (void)cm_data_space; /* unused */
5199
5200         /*
5201          * Passing a IP pktinfo to a unix socket might be rejected by the
5202          * Kernel, at least on FreeBSD. So skip this cmsg.
5203          */
5204         return 0;
5205 }
5206
5207 static int swrap_sendmsg_filter_cmsg_sol_socket(const struct cmsghdr *cmsg,
5208                                                 uint8_t **cm_data,
5209                                                 size_t *cm_data_space)
5210 {
5211         int rc = -1;
5212
5213         switch (cmsg->cmsg_type) {
5214         case SCM_RIGHTS:
5215                 SWRAP_LOG(SWRAP_LOG_TRACE,
5216                           "Ignoring SCM_RIGHTS on inet socket!");
5217                 rc = 0;
5218                 break;
5219 #ifdef SCM_CREDENTIALS
5220         case SCM_CREDENTIALS:
5221                 SWRAP_LOG(SWRAP_LOG_TRACE,
5222                           "Ignoring SCM_CREDENTIALS on inet socket!");
5223                 rc = 0;
5224                 break;
5225 #endif /* SCM_CREDENTIALS */
5226         default:
5227                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5228                                              cm_data,
5229                                              cm_data_space);
5230                 break;
5231         }
5232
5233         return rc;
5234 }
5235
5236 static const uint64_t swrap_unix_scm_right_magic = 0x8e0e13f27c42fc36;
5237
5238 /*
5239  * We only allow up to 6 fds at a time
5240  * as that's more than enough for Samba
5241  * and it means we can keep the logic simple
5242  * and work with fixed size arrays.
5243  *
5244  * We also keep sizeof(struct swrap_unix_scm_rights)
5245  * under PIPE_BUF (4096) in order to allow a non-blocking
5246  * write into the pipe.
5247  */
5248 #ifndef PIPE_BUF
5249 #define PIPE_BUF 4096
5250 #endif
5251 #define SWRAP_MAX_PASSED_FDS ((size_t)6)
5252 #define SWRAP_MAX_PASSED_SOCKET_INFO SWRAP_MAX_PASSED_FDS
5253 struct swrap_unix_scm_rights_payload {
5254         uint8_t num_idxs;
5255         int8_t idxs[SWRAP_MAX_PASSED_FDS];
5256         struct socket_info infos[SWRAP_MAX_PASSED_SOCKET_INFO];
5257 };
5258 struct swrap_unix_scm_rights {
5259         uint64_t magic;
5260         char package_name[sizeof(SOCKET_WRAPPER_PACKAGE)];
5261         char package_version[sizeof(SOCKET_WRAPPER_VERSION)];
5262         uint32_t full_size;
5263         uint32_t payload_size;
5264         struct swrap_unix_scm_rights_payload payload;
5265 };
5266
5267 static void swrap_dec_fd_passed_array(size_t num, struct socket_info **array)
5268 {
5269         int saved_errno = errno;
5270         size_t i;
5271
5272         for (i = 0; i < num; i++) {
5273                 struct socket_info *si = array[i];
5274                 if (si == NULL) {
5275                         continue;
5276                 }
5277
5278                 SWRAP_LOCK_SI(si);
5279                 swrap_dec_refcount(si);
5280                 if (si->fd_passed > 0) {
5281                         si->fd_passed -= 1;
5282                 }
5283                 SWRAP_UNLOCK_SI(si);
5284                 array[i] = NULL;
5285         }
5286
5287         errno = saved_errno;
5288 }
5289
5290 static void swrap_undo_si_idx_array(size_t num, int *array)
5291 {
5292         int saved_errno = errno;
5293         size_t i;
5294
5295         swrap_mutex_lock(&first_free_mutex);
5296
5297         for (i = 0; i < num; i++) {
5298                 struct socket_info *si = NULL;
5299
5300                 if (array[i] == -1) {
5301                         continue;
5302                 }
5303
5304                 si = swrap_get_socket_info(array[i]);
5305                 if (si == NULL) {
5306                         continue;
5307                 }
5308
5309                 SWRAP_LOCK_SI(si);
5310                 swrap_dec_refcount(si);
5311                 SWRAP_UNLOCK_SI(si);
5312
5313                 swrap_set_next_free(si, first_free);
5314                 first_free = array[i];
5315                 array[i] = -1;
5316         }
5317
5318         swrap_mutex_unlock(&first_free_mutex);
5319         errno = saved_errno;
5320 }
5321
5322 static void swrap_close_fd_array(size_t num, const int *array)
5323 {
5324         int saved_errno = errno;
5325         size_t i;
5326
5327         for (i = 0; i < num; i++) {
5328                 if (array[i] == -1) {
5329                         continue;
5330                 }
5331                 libc_close(array[i]);
5332         }
5333
5334         errno = saved_errno;
5335 }
5336
5337 union __swrap_fds {
5338         const uint8_t *p;
5339         int *fds;
5340 };
5341
5342 union __swrap_cmsghdr {
5343         const uint8_t *p;
5344         struct cmsghdr *cmsg;
5345 };
5346
5347 static int swrap_sendmsg_unix_scm_rights(struct cmsghdr *cmsg,
5348                                          uint8_t **cm_data,
5349                                          size_t *cm_data_space,
5350                                          int *scm_rights_pipe_fd)
5351 {
5352         struct swrap_unix_scm_rights info;
5353         struct swrap_unix_scm_rights_payload *payload = NULL;
5354         int si_idx_array[SWRAP_MAX_PASSED_FDS];
5355         struct socket_info *si_array[SWRAP_MAX_PASSED_FDS] = { NULL, };
5356         size_t info_idx = 0;
5357         size_t size_fds_in;
5358         size_t num_fds_in;
5359         union __swrap_fds __fds_in = { .p = NULL, };
5360         const int *fds_in = NULL;
5361         size_t num_fds_out;
5362         size_t size_fds_out;
5363         union __swrap_fds __fds_out = { .p = NULL, };
5364         int *fds_out = NULL;
5365         size_t cmsg_len;
5366         size_t cmsg_space;
5367         size_t new_cm_data_space;
5368         union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
5369         struct cmsghdr *new_cmsg = NULL;
5370         uint8_t *p = NULL;
5371         size_t i;
5372         int pipefd[2] = { -1, -1 };
5373         int rc;
5374         ssize_t sret;
5375
5376         /*
5377          * We pass this a buffer to the kernel make sure any padding
5378          * is also cleared.
5379          */
5380         ZERO_STRUCT(info);
5381         info.magic = swrap_unix_scm_right_magic;
5382         memcpy(info.package_name,
5383                SOCKET_WRAPPER_PACKAGE,
5384                sizeof(info.package_name));
5385         memcpy(info.package_version,
5386                SOCKET_WRAPPER_VERSION,
5387                sizeof(info.package_version));
5388         info.full_size = sizeof(info);
5389         info.payload_size = sizeof(info.payload);
5390         payload = &info.payload;
5391
5392         if (*scm_rights_pipe_fd != -1) {
5393                 SWRAP_LOG(SWRAP_LOG_ERROR,
5394                           "Two SCM_RIGHTS headers are not supported by socket_wrapper");
5395                 errno = EINVAL;
5396                 return -1;
5397         }
5398
5399         if (cmsg->cmsg_len < CMSG_LEN(0)) {
5400                 SWRAP_LOG(SWRAP_LOG_ERROR,
5401                           "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
5402                           (size_t)cmsg->cmsg_len,
5403                           CMSG_LEN(0));
5404                 errno = EINVAL;
5405                 return -1;
5406         }
5407         size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
5408         if ((size_fds_in % sizeof(int)) != 0) {
5409                 SWRAP_LOG(SWRAP_LOG_ERROR,
5410                           "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
5411                           (size_t)cmsg->cmsg_len,
5412                           size_fds_in,
5413                           sizeof(int));
5414                 errno = EINVAL;
5415                 return -1;
5416         }
5417         num_fds_in = size_fds_in / sizeof(int);
5418         if (num_fds_in > SWRAP_MAX_PASSED_FDS) {
5419                 SWRAP_LOG(SWRAP_LOG_ERROR,
5420                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5421                           "num_fds_in=%zu > "
5422                           "SWRAP_MAX_PASSED_FDS(%zu)",
5423                           (size_t)cmsg->cmsg_len,
5424                           size_fds_in,
5425                           num_fds_in,
5426                           SWRAP_MAX_PASSED_FDS);
5427                 errno = EINVAL;
5428                 return -1;
5429         }
5430         if (num_fds_in == 0) {
5431                 SWRAP_LOG(SWRAP_LOG_ERROR,
5432                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5433                           "num_fds_in=%zu",
5434                           (size_t)cmsg->cmsg_len,
5435                           size_fds_in,
5436                           num_fds_in);
5437                 errno = EINVAL;
5438                 return -1;
5439         }
5440         __fds_in.p = CMSG_DATA(cmsg);
5441         fds_in = __fds_in.fds;
5442         num_fds_out = num_fds_in + 1;
5443
5444         SWRAP_LOG(SWRAP_LOG_TRACE,
5445                   "num_fds_in=%zu num_fds_out=%zu",
5446                   num_fds_in, num_fds_out);
5447
5448         size_fds_out = sizeof(int) * num_fds_out;
5449         cmsg_len = CMSG_LEN(size_fds_out);
5450         cmsg_space = CMSG_SPACE(size_fds_out);
5451
5452         new_cm_data_space = *cm_data_space + cmsg_space;
5453
5454         p = realloc((*cm_data), new_cm_data_space);
5455         if (p == NULL) {
5456                 return -1;
5457         }
5458         (*cm_data) = p;
5459         p = (*cm_data) + (*cm_data_space);
5460         memset(p, 0, cmsg_space);
5461         __new_cmsg.p = p;
5462         new_cmsg = __new_cmsg.cmsg;
5463         *new_cmsg = *cmsg;
5464         __fds_out.p = CMSG_DATA(new_cmsg);
5465         fds_out = __fds_out.fds;
5466         memcpy(fds_out, fds_in, size_fds_in);
5467         new_cmsg->cmsg_len = cmsg->cmsg_len;
5468
5469         for (i = 0; i < num_fds_in; i++) {
5470                 size_t j;
5471
5472                 payload->idxs[i] = -1;
5473                 payload->num_idxs++;
5474
5475                 si_idx_array[i] = find_socket_info_index(fds_in[i]);
5476                 if (si_idx_array[i] == -1) {
5477                         continue;
5478                 }
5479
5480                 si_array[i] = swrap_get_socket_info(si_idx_array[i]);
5481                 if (si_array[i] == NULL) {
5482                         SWRAP_LOG(SWRAP_LOG_ERROR,
5483                                   "fds_in[%zu]=%d si_idx_array[%zu]=%d missing!",
5484                                   i, fds_in[i], i, si_idx_array[i]);
5485                         errno = EINVAL;
5486                         return -1;
5487                 }
5488
5489                 for (j = 0; j < i; j++) {
5490                         if (si_array[j] == si_array[i]) {
5491                                 payload->idxs[i] = payload->idxs[j];
5492                                 break;
5493                         }
5494                 }
5495                 if (payload->idxs[i] == -1) {
5496                         if (info_idx >= SWRAP_MAX_PASSED_SOCKET_INFO) {
5497                                 SWRAP_LOG(SWRAP_LOG_ERROR,
5498                                           "fds_in[%zu]=%d,si_idx_array[%zu]=%d: "
5499                                           "info_idx=%zu >= SWRAP_MAX_PASSED_FDS(%zu)!",
5500                                           i, fds_in[i], i, si_idx_array[i],
5501                                           info_idx,
5502                                           SWRAP_MAX_PASSED_SOCKET_INFO);
5503                                 errno = EINVAL;
5504                                 return -1;
5505                         }
5506                         payload->idxs[i] = info_idx;
5507                         info_idx += 1;
5508                         continue;
5509                 }
5510         }
5511
5512         for (i = 0; i < num_fds_in; i++) {
5513                 struct socket_info *si = si_array[i];
5514
5515                 if (si == NULL) {
5516                         SWRAP_LOG(SWRAP_LOG_TRACE,
5517                                   "fds_in[%zu]=%d not an inet socket",
5518                                   i, fds_in[i]);
5519                         continue;
5520                 }
5521
5522                 SWRAP_LOG(SWRAP_LOG_TRACE,
5523                           "fds_in[%zu]=%d si_idx_array[%zu]=%d "
5524                           "passing as info.idxs[%zu]=%d!",
5525                           i, fds_in[i],
5526                           i, si_idx_array[i],
5527                           i, payload->idxs[i]);
5528
5529                 SWRAP_LOCK_SI(si);
5530                 si->fd_passed += 1;
5531                 payload->infos[payload->idxs[i]] = *si;
5532                 payload->infos[payload->idxs[i]].fd_passed = 0;
5533                 SWRAP_UNLOCK_SI(si);
5534         }
5535
5536         rc = pipe(pipefd);
5537         if (rc == -1) {
5538                 int saved_errno = errno;
5539                 SWRAP_LOG(SWRAP_LOG_ERROR,
5540                           "pipe() failed - %d %s",
5541                           saved_errno,
5542                           strerror(saved_errno));
5543                 swrap_dec_fd_passed_array(num_fds_in, si_array);
5544                 errno = saved_errno;
5545                 return -1;
5546         }
5547
5548         sret = libc_write(pipefd[1], &info, sizeof(info));
5549         if (sret != sizeof(info)) {
5550                 int saved_errno = errno;
5551                 if (sret != -1) {
5552                         saved_errno = EINVAL;
5553                 }
5554                 SWRAP_LOG(SWRAP_LOG_ERROR,
5555                           "write() failed - sret=%zd - %d %s",
5556                           sret, saved_errno,
5557                           strerror(saved_errno));
5558                 swrap_dec_fd_passed_array(num_fds_in, si_array);
5559                 libc_close(pipefd[1]);
5560                 libc_close(pipefd[0]);
5561                 errno = saved_errno;
5562                 return -1;
5563         }
5564         libc_close(pipefd[1]);
5565
5566         /*
5567          * Add the pipe read end to the end of the passed fd array
5568          */
5569         fds_out[num_fds_in] = pipefd[0];
5570         new_cmsg->cmsg_len = cmsg_len;
5571
5572         /* we're done ... */
5573         *scm_rights_pipe_fd = pipefd[0];
5574         *cm_data_space = new_cm_data_space;
5575
5576         return 0;
5577 }
5578
5579 static int swrap_sendmsg_unix_sol_socket(struct cmsghdr *cmsg,
5580                                          uint8_t **cm_data,
5581                                          size_t *cm_data_space,
5582                                          int *scm_rights_pipe_fd)
5583 {
5584         int rc = -1;
5585
5586         switch (cmsg->cmsg_type) {
5587         case SCM_RIGHTS:
5588                 rc = swrap_sendmsg_unix_scm_rights(cmsg,
5589                                                    cm_data,
5590                                                    cm_data_space,
5591                                                    scm_rights_pipe_fd);
5592                 break;
5593         default:
5594                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5595                                              cm_data,
5596                                              cm_data_space);
5597                 break;
5598         }
5599
5600         return rc;
5601 }
5602
5603 static int swrap_recvmsg_unix_scm_rights(struct cmsghdr *cmsg,
5604                                          uint8_t **cm_data,
5605                                          size_t *cm_data_space)
5606 {
5607         int scm_rights_pipe_fd = -1;
5608         struct swrap_unix_scm_rights info;
5609         struct swrap_unix_scm_rights_payload *payload = NULL;
5610         int si_idx_array[SWRAP_MAX_PASSED_FDS];
5611         size_t size_fds_in;
5612         size_t num_fds_in;
5613         union __swrap_fds __fds_in = { .p = NULL, };
5614         const int *fds_in = NULL;
5615         size_t num_fds_out;
5616         size_t size_fds_out;
5617         union __swrap_fds __fds_out = { .p = NULL, };
5618         int *fds_out = NULL;
5619         size_t cmsg_len;
5620         size_t cmsg_space;
5621         size_t new_cm_data_space;
5622         union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
5623         struct cmsghdr *new_cmsg = NULL;
5624         uint8_t *p = NULL;
5625         ssize_t sret;
5626         size_t i;
5627         int cmp;
5628
5629         if (cmsg->cmsg_len < CMSG_LEN(0)) {
5630                 SWRAP_LOG(SWRAP_LOG_ERROR,
5631                           "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
5632                           (size_t)cmsg->cmsg_len,
5633                           CMSG_LEN(0));
5634                 errno = EINVAL;
5635                 return -1;
5636         }
5637         size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
5638         if ((size_fds_in % sizeof(int)) != 0) {
5639                 SWRAP_LOG(SWRAP_LOG_ERROR,
5640                           "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
5641                           (size_t)cmsg->cmsg_len,
5642                           size_fds_in,
5643                           sizeof(int));
5644                 errno = EINVAL;
5645                 return -1;
5646         }
5647         num_fds_in = size_fds_in / sizeof(int);
5648         if (num_fds_in > (SWRAP_MAX_PASSED_FDS + 1)) {
5649                 SWRAP_LOG(SWRAP_LOG_ERROR,
5650                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5651                           "num_fds_in=%zu > SWRAP_MAX_PASSED_FDS+1(%zu)",
5652                           (size_t)cmsg->cmsg_len,
5653                           size_fds_in,
5654                           num_fds_in,
5655                           SWRAP_MAX_PASSED_FDS+1);
5656                 errno = EINVAL;
5657                 return -1;
5658         }
5659         if (num_fds_in <= 1) {
5660                 SWRAP_LOG(SWRAP_LOG_ERROR,
5661                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5662                           "num_fds_in=%zu",
5663                           (size_t)cmsg->cmsg_len,
5664                           size_fds_in,
5665                           num_fds_in);
5666                 errno = EINVAL;
5667                 return -1;
5668         }
5669         __fds_in.p = CMSG_DATA(cmsg);
5670         fds_in = __fds_in.fds;
5671         num_fds_out = num_fds_in - 1;
5672
5673         SWRAP_LOG(SWRAP_LOG_TRACE,
5674                   "num_fds_in=%zu num_fds_out=%zu",
5675                   num_fds_in, num_fds_out);
5676
5677         for (i = 0; i < num_fds_in; i++) {
5678                 /* Check if we have a stale fd and remove it */
5679                 swrap_remove_stale(fds_in[i]);
5680         }
5681
5682         scm_rights_pipe_fd = fds_in[num_fds_out];
5683         size_fds_out = sizeof(int) * num_fds_out;
5684         cmsg_len = CMSG_LEN(size_fds_out);
5685         cmsg_space = CMSG_SPACE(size_fds_out);
5686
5687         new_cm_data_space = *cm_data_space + cmsg_space;
5688
5689         p = realloc((*cm_data), new_cm_data_space);
5690         if (p == NULL) {
5691                 swrap_close_fd_array(num_fds_in, fds_in);
5692                 return -1;
5693         }
5694         (*cm_data) = p;
5695         p = (*cm_data) + (*cm_data_space);
5696         memset(p, 0, cmsg_space);
5697         __new_cmsg.p = p;
5698         new_cmsg = __new_cmsg.cmsg;
5699         *new_cmsg = *cmsg;
5700         __fds_out.p = CMSG_DATA(new_cmsg);
5701         fds_out = __fds_out.fds;
5702         memcpy(fds_out, fds_in, size_fds_out);
5703         new_cmsg->cmsg_len = cmsg_len;
5704
5705         sret = read(scm_rights_pipe_fd, &info, sizeof(info));
5706         if (sret != sizeof(info)) {
5707                 int saved_errno = errno;
5708                 if (sret != -1) {
5709                         saved_errno = EINVAL;
5710                 }
5711                 SWRAP_LOG(SWRAP_LOG_ERROR,
5712                           "read() failed - sret=%zd - %d %s",
5713                           sret, saved_errno,
5714                           strerror(saved_errno));
5715                 swrap_close_fd_array(num_fds_in, fds_in);
5716                 errno = saved_errno;
5717                 return -1;
5718         }
5719         libc_close(scm_rights_pipe_fd);
5720         payload = &info.payload;
5721
5722         if (info.magic != swrap_unix_scm_right_magic) {
5723                 SWRAP_LOG(SWRAP_LOG_ERROR,
5724                           "info.magic=0x%llx != swrap_unix_scm_right_magic=0x%llx",
5725                           (unsigned long long)info.magic,
5726                           (unsigned long long)swrap_unix_scm_right_magic);
5727                 swrap_close_fd_array(num_fds_out, fds_out);
5728                 errno = EINVAL;
5729                 return -1;
5730         }
5731
5732         cmp = memcmp(info.package_name,
5733                      SOCKET_WRAPPER_PACKAGE,
5734                      sizeof(info.package_name));
5735         if (cmp != 0) {
5736                 SWRAP_LOG(SWRAP_LOG_ERROR,
5737                           "info.package_name='%.*s' != '%s'",
5738                           (int)sizeof(info.package_name),
5739                           info.package_name,
5740                           SOCKET_WRAPPER_PACKAGE);
5741                 swrap_close_fd_array(num_fds_out, fds_out);
5742                 errno = EINVAL;
5743                 return -1;
5744         }
5745
5746         cmp = memcmp(info.package_version,
5747                      SOCKET_WRAPPER_VERSION,
5748                      sizeof(info.package_version));
5749         if (cmp != 0) {
5750                 SWRAP_LOG(SWRAP_LOG_ERROR,
5751                           "info.package_version='%.*s' != '%s'",
5752                           (int)sizeof(info.package_version),
5753                           info.package_version,
5754                           SOCKET_WRAPPER_VERSION);
5755                 swrap_close_fd_array(num_fds_out, fds_out);
5756                 errno = EINVAL;
5757                 return -1;
5758         }
5759
5760         if (info.full_size != sizeof(info)) {
5761                 SWRAP_LOG(SWRAP_LOG_ERROR,
5762                           "info.full_size=%zu != sizeof(info)=%zu",
5763                           (size_t)info.full_size,
5764                           sizeof(info));
5765                 swrap_close_fd_array(num_fds_out, fds_out);
5766                 errno = EINVAL;
5767                 return -1;
5768         }
5769
5770         if (info.payload_size != sizeof(info.payload)) {
5771                 SWRAP_LOG(SWRAP_LOG_ERROR,
5772                           "info.payload_size=%zu != sizeof(info.payload)=%zu",
5773                           (size_t)info.payload_size,
5774                           sizeof(info.payload));
5775                 swrap_close_fd_array(num_fds_out, fds_out);
5776                 errno = EINVAL;
5777                 return -1;
5778         }
5779
5780         if (payload->num_idxs != num_fds_out) {
5781                 SWRAP_LOG(SWRAP_LOG_ERROR,
5782                           "info.num_idxs=%u != num_fds_out=%zu",
5783                           payload->num_idxs, num_fds_out);
5784                 swrap_close_fd_array(num_fds_out, fds_out);
5785                 errno = EINVAL;
5786                 return -1;
5787         }
5788
5789         for (i = 0; i < num_fds_out; i++) {
5790                 size_t j;
5791
5792                 si_idx_array[i] = -1;
5793
5794                 if (payload->idxs[i] == -1) {
5795                         SWRAP_LOG(SWRAP_LOG_TRACE,
5796                                   "fds_out[%zu]=%d not an inet socket",
5797                                   i, fds_out[i]);
5798                         continue;
5799                 }
5800
5801                 if (payload->idxs[i] < 0) {
5802                         SWRAP_LOG(SWRAP_LOG_ERROR,
5803                                   "fds_out[%zu]=%d info.idxs[%zu]=%d < 0!",
5804                                   i, fds_out[i], i, payload->idxs[i]);
5805                         swrap_close_fd_array(num_fds_out, fds_out);
5806                         errno = EINVAL;
5807                         return -1;
5808                 }
5809
5810                 if (payload->idxs[i] >= payload->num_idxs) {
5811                         SWRAP_LOG(SWRAP_LOG_ERROR,
5812                                   "fds_out[%zu]=%d info.idxs[%zu]=%d >= %u!",
5813                                   i, fds_out[i], i, payload->idxs[i],
5814                                   payload->num_idxs);
5815                         swrap_close_fd_array(num_fds_out, fds_out);
5816                         errno = EINVAL;
5817                         return -1;
5818                 }
5819
5820                 if ((size_t)fds_out[i] >= socket_fds_max) {
5821                         SWRAP_LOG(SWRAP_LOG_ERROR,
5822                                   "The max socket index limit of %zu has been reached, "
5823                                   "trying to add %d",
5824                                   socket_fds_max,
5825                                   fds_out[i]);
5826                         swrap_close_fd_array(num_fds_out, fds_out);
5827                         errno = EMFILE;
5828                         return -1;
5829                 }
5830
5831                 SWRAP_LOG(SWRAP_LOG_TRACE,
5832                           "fds_in[%zu]=%d "
5833                           "received as info.idxs[%zu]=%d!",
5834                           i, fds_out[i],
5835                           i, payload->idxs[i]);
5836
5837                 for (j = 0; j < i; j++) {
5838                         if (payload->idxs[j] == -1) {
5839                                 continue;
5840                         }
5841                         if (payload->idxs[j] == payload->idxs[i]) {
5842                                 si_idx_array[i] = si_idx_array[j];
5843                         }
5844                 }
5845                 if (si_idx_array[i] == -1) {
5846                         const struct socket_info *si = &payload->infos[payload->idxs[i]];
5847
5848                         si_idx_array[i] = swrap_add_socket_info(si);
5849                         if (si_idx_array[i] == -1) {
5850                                 int saved_errno = errno;
5851                                 SWRAP_LOG(SWRAP_LOG_ERROR,
5852                                           "The max socket index limit of %zu has been reached, "
5853                                           "trying to add %d",
5854                                           socket_fds_max,
5855                                           fds_out[i]);
5856                                 swrap_undo_si_idx_array(i, si_idx_array);
5857                                 swrap_close_fd_array(num_fds_out, fds_out);
5858                                 errno = saved_errno;
5859                                 return -1;
5860                         }
5861                         SWRAP_LOG(SWRAP_LOG_TRACE,
5862                                   "Imported %s socket for protocol %s, fd=%d",
5863                                   si->family == AF_INET ? "IPv4" : "IPv6",
5864                                   si->type == SOCK_DGRAM ? "UDP" : "TCP",
5865                                   fds_out[i]);
5866                 }
5867         }
5868
5869         for (i = 0; i < num_fds_out; i++) {
5870                 if (si_idx_array[i] == -1) {
5871                         continue;
5872                 }
5873                 set_socket_info_index(fds_out[i], si_idx_array[i]);
5874         }
5875
5876         /* we're done ... */
5877         *cm_data_space = new_cm_data_space;
5878
5879         return 0;
5880 }
5881
5882 static int swrap_recvmsg_unix_sol_socket(struct cmsghdr *cmsg,
5883                                          uint8_t **cm_data,
5884                                          size_t *cm_data_space)
5885 {
5886         int rc = -1;
5887
5888         switch (cmsg->cmsg_type) {
5889         case SCM_RIGHTS:
5890                 rc = swrap_recvmsg_unix_scm_rights(cmsg,
5891                                                    cm_data,
5892                                                    cm_data_space);
5893                 break;
5894         default:
5895                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5896                                              cm_data,
5897                                              cm_data_space);
5898                 break;
5899         }
5900
5901         return rc;
5902 }
5903
5904 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5905
5906 static int swrap_sendmsg_before_unix(const struct msghdr *_msg_in,
5907                                      struct msghdr *msg_tmp,
5908                                      int *scm_rights_pipe_fd)
5909 {
5910 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
5911         struct msghdr *msg_in = discard_const_p(struct msghdr, _msg_in);
5912         struct cmsghdr *cmsg = NULL;
5913         uint8_t *cm_data = NULL;
5914         size_t cm_data_space = 0;
5915         int rc = -1;
5916
5917         *msg_tmp = *msg_in;
5918         *scm_rights_pipe_fd = -1;
5919
5920         /* Nothing to do */
5921         if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
5922                 return 0;
5923         }
5924
5925         for (cmsg = CMSG_FIRSTHDR(msg_in);
5926              cmsg != NULL;
5927              cmsg = CMSG_NXTHDR(msg_in, cmsg)) {
5928                 switch (cmsg->cmsg_level) {
5929                 case SOL_SOCKET:
5930                         rc = swrap_sendmsg_unix_sol_socket(cmsg,
5931                                                            &cm_data,
5932                                                            &cm_data_space,
5933                                                            scm_rights_pipe_fd);
5934                         break;
5935
5936                 default:
5937                         rc = swrap_sendmsg_copy_cmsg(cmsg,
5938                                                      &cm_data,
5939                                                      &cm_data_space);
5940                         break;
5941                 }
5942                 if (rc < 0) {
5943                         int saved_errno = errno;
5944                         SAFE_FREE(cm_data);
5945                         errno = saved_errno;
5946                         return rc;
5947                 }
5948         }
5949
5950         msg_tmp->msg_controllen = cm_data_space;
5951         msg_tmp->msg_control = cm_data;
5952
5953         return 0;
5954 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5955         *msg_tmp = *_msg_in;
5956         return 0;
5957 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5958 }
5959
5960 static ssize_t swrap_sendmsg_after_unix(struct msghdr *msg_tmp,
5961                                         ssize_t ret,
5962                                         int scm_rights_pipe_fd)
5963 {
5964 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
5965         int saved_errno = errno;
5966         SAFE_FREE(msg_tmp->msg_control);
5967         if (scm_rights_pipe_fd != -1) {
5968                 libc_close(scm_rights_pipe_fd);
5969         }
5970         errno = saved_errno;
5971 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
5972         return ret;
5973 }
5974
5975 static int swrap_recvmsg_before_unix(struct msghdr *msg_in,
5976                                      struct msghdr *msg_tmp,
5977                                      uint8_t **tmp_control)
5978 {
5979 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
5980         const size_t cm_extra_space = CMSG_SPACE(sizeof(int));
5981         uint8_t *cm_data = NULL;
5982         size_t cm_data_space = 0;
5983
5984         *msg_tmp = *msg_in;
5985         *tmp_control = NULL;
5986
5987         SWRAP_LOG(SWRAP_LOG_TRACE,
5988                   "msg_in->msg_controllen=%zu",
5989                   (size_t)msg_in->msg_controllen);
5990
5991         /* Nothing to do */
5992         if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
5993                 return 0;
5994         }
5995
5996         /*
5997          * We need to give the kernel a bit more space in order
5998          * recv the pipe fd, added by swrap_sendmsg_before_unix()).
5999          * swrap_recvmsg_after_unix() will hide it again.
6000          */
6001         cm_data_space = msg_in->msg_controllen;
6002         if (cm_data_space < (INT32_MAX - cm_extra_space)) {
6003                 cm_data_space += cm_extra_space;
6004         }
6005         cm_data = calloc(1, cm_data_space);
6006         if (cm_data == NULL) {
6007                 return -1;
6008         }
6009
6010         msg_tmp->msg_controllen = cm_data_space;
6011         msg_tmp->msg_control = cm_data;
6012         *tmp_control = cm_data;
6013
6014         SWRAP_LOG(SWRAP_LOG_TRACE,
6015                   "msg_tmp->msg_controllen=%zu",
6016                   (size_t)msg_tmp->msg_controllen);
6017         return 0;
6018 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6019         *msg_tmp = *msg_in;
6020         *tmp_control = NULL;
6021         return 0;
6022 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6023 }
6024
6025 static ssize_t swrap_recvmsg_after_unix(struct msghdr *msg_tmp,
6026                                         uint8_t **tmp_control,
6027                                         struct msghdr *msg_out,
6028                                         ssize_t ret)
6029 {
6030 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6031         struct cmsghdr *cmsg = NULL;
6032         uint8_t *cm_data = NULL;
6033         size_t cm_data_space = 0;
6034         int rc = -1;
6035
6036         if (ret < 0) {
6037                 int saved_errno = errno;
6038                 SWRAP_LOG(SWRAP_LOG_TRACE, "ret=%zd - %d - %s", ret,
6039                           saved_errno, strerror(saved_errno));
6040                 SAFE_FREE(*tmp_control);
6041                 /* msg_out should not be touched on error */
6042                 errno = saved_errno;
6043                 return ret;
6044         }
6045
6046         SWRAP_LOG(SWRAP_LOG_TRACE,
6047                   "msg_tmp->msg_controllen=%zu",
6048                   (size_t)msg_tmp->msg_controllen);
6049
6050         /* Nothing to do */
6051         if (msg_tmp->msg_controllen == 0 || msg_tmp->msg_control == NULL) {
6052                 int saved_errno = errno;
6053                 *msg_out = *msg_tmp;
6054                 SAFE_FREE(*tmp_control);
6055                 errno = saved_errno;
6056                 return ret;
6057         }
6058
6059         for (cmsg = CMSG_FIRSTHDR(msg_tmp);
6060              cmsg != NULL;
6061              cmsg = CMSG_NXTHDR(msg_tmp, cmsg)) {
6062                 switch (cmsg->cmsg_level) {
6063                 case SOL_SOCKET:
6064                         rc = swrap_recvmsg_unix_sol_socket(cmsg,
6065                                                            &cm_data,
6066                                                            &cm_data_space);
6067                         break;
6068
6069                 default:
6070                         rc = swrap_sendmsg_copy_cmsg(cmsg,
6071                                                      &cm_data,
6072                                                      &cm_data_space);
6073                         break;
6074                 }
6075                 if (rc < 0) {
6076                         int saved_errno = errno;
6077                         SAFE_FREE(cm_data);
6078                         SAFE_FREE(*tmp_control);
6079                         errno = saved_errno;
6080                         return rc;
6081                 }
6082         }
6083
6084         /*
6085          * msg_tmp->msg_control (*tmp_control) was created by
6086          * swrap_recvmsg_before_unix() and msg_out->msg_control
6087          * is still the buffer of the caller.
6088          */
6089         msg_tmp->msg_control = msg_out->msg_control;
6090         msg_tmp->msg_controllen = msg_out->msg_controllen;
6091         *msg_out = *msg_tmp;
6092
6093         cm_data_space = MIN(cm_data_space, msg_out->msg_controllen);
6094         memcpy(msg_out->msg_control, cm_data, cm_data_space);
6095         msg_out->msg_controllen = cm_data_space;
6096         SAFE_FREE(cm_data);
6097         SAFE_FREE(*tmp_control);
6098
6099         SWRAP_LOG(SWRAP_LOG_TRACE,
6100                   "msg_out->msg_controllen=%zu",
6101                   (size_t)msg_out->msg_controllen);
6102         return ret;
6103 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6104         int saved_errno = errno;
6105         *msg_out = *msg_tmp;
6106         SAFE_FREE(*tmp_control);
6107         errno = saved_errno;
6108         return ret;
6109 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6110 }
6111
6112 static ssize_t swrap_sendmsg_before(int fd,
6113                                     struct socket_info *si,
6114                                     struct msghdr *msg,
6115                                     struct iovec *tmp_iov,
6116                                     struct sockaddr_un *tmp_un,
6117                                     const struct sockaddr_un **to_un,
6118                                     const struct sockaddr **to,
6119                                     int *bcast)
6120 {
6121         size_t i, len = 0;
6122         ssize_t ret = -1;
6123
6124         if (to_un) {
6125                 *to_un = NULL;
6126         }
6127         if (to) {
6128                 *to = NULL;
6129         }
6130         if (bcast) {
6131                 *bcast = 0;
6132         }
6133
6134         SWRAP_LOCK_SI(si);
6135
6136         switch (si->type) {
6137         case SOCK_STREAM: {
6138                 unsigned long mtu;
6139
6140                 if (!si->connected) {
6141                         errno = ENOTCONN;
6142                         goto out;
6143                 }
6144
6145                 if (msg->msg_iovlen == 0) {
6146                         break;
6147                 }
6148
6149                 mtu = socket_wrapper_mtu();
6150                 for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6151                         size_t nlen;
6152                         nlen = len + msg->msg_iov[i].iov_len;
6153                         if (nlen < len) {
6154                                 /* overflow */
6155                                 errno = EMSGSIZE;
6156                                 goto out;
6157                         }
6158                         if (nlen > mtu) {
6159                                 break;
6160                         }
6161                 }
6162                 msg->msg_iovlen = i;
6163                 if (msg->msg_iovlen == 0) {
6164                         *tmp_iov = msg->msg_iov[0];
6165                         tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
6166                                                (size_t)mtu);
6167                         msg->msg_iov = tmp_iov;
6168                         msg->msg_iovlen = 1;
6169                 }
6170                 break;
6171         }
6172         case SOCK_DGRAM:
6173                 if (si->connected) {
6174                         if (msg->msg_name != NULL) {
6175                                 /*
6176                                  * We are dealing with unix sockets and if we
6177                                  * are connected, we should only talk to the
6178                                  * connected unix path. Using the fd to send
6179                                  * to another server would be hard to achieve.
6180                                  */
6181                                 msg->msg_name = NULL;
6182                                 msg->msg_namelen = 0;
6183                         }
6184                 } else {
6185                         const struct sockaddr *msg_name;
6186                         msg_name = (const struct sockaddr *)msg->msg_name;
6187
6188                         if (msg_name == NULL) {
6189                                 errno = ENOTCONN;
6190                                 goto out;
6191                         }
6192
6193
6194                         ret = sockaddr_convert_to_un(si, msg_name, msg->msg_namelen,
6195                                                      tmp_un, 0, bcast);
6196                         if (ret == -1) {
6197                                 goto out;
6198                         }
6199
6200                         if (to_un) {
6201                                 *to_un = tmp_un;
6202                         }
6203                         if (to) {
6204                                 *to = msg_name;
6205                         }
6206                         msg->msg_name = tmp_un;
6207                         msg->msg_namelen = sizeof(*tmp_un);
6208                 }
6209
6210                 if (si->bound == 0) {
6211                         ret = swrap_auto_bind(fd, si, si->family);
6212                         if (ret == -1) {
6213                                 SWRAP_UNLOCK_SI(si);
6214                                 if (errno == ENOTSOCK) {
6215                                         swrap_remove_stale(fd);
6216                                         ret = -ENOTSOCK;
6217                                 } else {
6218                                         SWRAP_LOG(SWRAP_LOG_ERROR, "swrap_sendmsg_before failed");
6219                                 }
6220                                 return ret;
6221                         }
6222                 }
6223
6224                 if (!si->defer_connect) {
6225                         break;
6226                 }
6227
6228                 ret = sockaddr_convert_to_un(si,
6229                                              &si->peername.sa.s,
6230                                              si->peername.sa_socklen,
6231                                              tmp_un,
6232                                              0,
6233                                              NULL);
6234                 if (ret == -1) {
6235                         goto out;
6236                 }
6237
6238                 ret = libc_connect(fd,
6239                                    (struct sockaddr *)(void *)tmp_un,
6240                                    sizeof(*tmp_un));
6241
6242                 /* to give better errors */
6243                 if (ret == -1 && errno == ENOENT) {
6244                         errno = EHOSTUNREACH;
6245                 }
6246
6247                 if (ret == -1) {
6248                         goto out;
6249                 }
6250
6251                 si->defer_connect = 0;
6252                 break;
6253         default:
6254                 errno = EHOSTUNREACH;
6255                 goto out;
6256         }
6257
6258         ret = 0;
6259 out:
6260         SWRAP_UNLOCK_SI(si);
6261
6262         return ret;
6263 }
6264
6265 static void swrap_sendmsg_after(int fd,
6266                                 struct socket_info *si,
6267                                 struct msghdr *msg,
6268                                 const struct sockaddr *to,
6269                                 ssize_t ret)
6270 {
6271         int saved_errno = errno;
6272         size_t i, len = 0;
6273         uint8_t *buf;
6274         off_t ofs = 0;
6275         size_t avail = 0;
6276         size_t remain;
6277
6278         /* to give better errors */
6279         if (ret == -1) {
6280                 if (saved_errno == ENOENT) {
6281                         saved_errno = EHOSTUNREACH;
6282                 } else if (saved_errno == ENOTSOCK) {
6283                         /* If the fd is not a socket, remove it */
6284                         swrap_remove_stale(fd);
6285                 }
6286         }
6287
6288         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6289                 avail += msg->msg_iov[i].iov_len;
6290         }
6291
6292         if (ret == -1) {
6293                 remain = MIN(80, avail);
6294         } else {
6295                 remain = ret;
6296         }
6297
6298         /* we capture it as one single packet */
6299         buf = (uint8_t *)malloc(remain);
6300         if (!buf) {
6301                 /* we just not capture the packet */
6302                 errno = saved_errno;
6303                 return;
6304         }
6305
6306         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6307                 size_t this_time = MIN(remain, (size_t)msg->msg_iov[i].iov_len);
6308                 if (this_time > 0) {
6309                         memcpy(buf + ofs,
6310                                msg->msg_iov[i].iov_base,
6311                                this_time);
6312                 }
6313                 ofs += this_time;
6314                 remain -= this_time;
6315         }
6316         len = ofs;
6317
6318         SWRAP_LOCK_SI(si);
6319
6320         switch (si->type) {
6321         case SOCK_STREAM:
6322                 if (ret == -1) {
6323                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND, buf, len);
6324                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND_RST, NULL, 0);
6325                 } else {
6326                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND, buf, len);
6327                 }
6328                 break;
6329
6330         case SOCK_DGRAM:
6331                 if (si->connected) {
6332                         to = &si->peername.sa.s;
6333                 }
6334                 if (ret == -1) {
6335                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6336                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO_UNREACH, buf, len);
6337                 } else {
6338                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6339                 }
6340                 break;
6341         }
6342
6343         SWRAP_UNLOCK_SI(si);
6344
6345         free(buf);
6346         errno = saved_errno;
6347 }
6348
6349 static int swrap_recvmsg_before(int fd,
6350                                 struct socket_info *si,
6351                                 struct msghdr *msg,
6352                                 struct iovec *tmp_iov)
6353 {
6354         size_t i, len = 0;
6355         int ret = -1;
6356
6357         SWRAP_LOCK_SI(si);
6358
6359         (void)fd; /* unused */
6360
6361         switch (si->type) {
6362         case SOCK_STREAM: {
6363                 unsigned int mtu;
6364                 if (!si->connected) {
6365                         errno = ENOTCONN;
6366                         goto out;
6367                 }
6368
6369                 if (msg->msg_iovlen == 0) {
6370                         break;
6371                 }
6372
6373                 mtu = socket_wrapper_mtu();
6374                 for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6375                         size_t nlen;
6376                         nlen = len + msg->msg_iov[i].iov_len;
6377                         if (nlen > mtu) {
6378                                 break;
6379                         }
6380                 }
6381                 msg->msg_iovlen = i;
6382                 if (msg->msg_iovlen == 0) {
6383                         *tmp_iov = msg->msg_iov[0];
6384                         tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
6385                                                (size_t)mtu);
6386                         msg->msg_iov = tmp_iov;
6387                         msg->msg_iovlen = 1;
6388                 }
6389                 break;
6390         }
6391         case SOCK_DGRAM:
6392                 if (msg->msg_name == NULL) {
6393                         errno = EINVAL;
6394                         goto out;
6395                 }
6396
6397                 if (msg->msg_iovlen == 0) {
6398                         break;
6399                 }
6400
6401                 if (si->bound == 0) {
6402                         ret = swrap_auto_bind(fd, si, si->family);
6403                         if (ret == -1) {
6404                                 SWRAP_UNLOCK_SI(si);
6405                                 /*
6406                                  * When attempting to read or write to a
6407                                  * descriptor, if an underlying autobind fails
6408                                  * because it's not a socket, stop intercepting
6409                                  * uses of that descriptor.
6410                                  */
6411                                 if (errno == ENOTSOCK) {
6412                                         swrap_remove_stale(fd);
6413                                         ret = -ENOTSOCK;
6414                                 } else {
6415                                         SWRAP_LOG(SWRAP_LOG_ERROR,
6416                                                   "swrap_recvmsg_before failed");
6417                                 }
6418                                 return ret;
6419                         }
6420                 }
6421                 break;
6422         default:
6423                 errno = EHOSTUNREACH;
6424                 goto out;
6425         }
6426
6427         ret = 0;
6428 out:
6429         SWRAP_UNLOCK_SI(si);
6430
6431         return ret;
6432 }
6433
6434 static int swrap_recvmsg_after(int fd,
6435                                struct socket_info *si,
6436                                struct msghdr *msg,
6437                                const struct sockaddr_un *un_addr,
6438                                socklen_t un_addrlen,
6439                                ssize_t ret)
6440 {
6441         int saved_errno = errno;
6442         size_t i;
6443         uint8_t *buf = NULL;
6444         off_t ofs = 0;
6445         size_t avail = 0;
6446         size_t remain;
6447         int rc;
6448
6449         /* to give better errors */
6450         if (ret == -1) {
6451                 if (saved_errno == ENOENT) {
6452                         saved_errno = EHOSTUNREACH;
6453                 } else if (saved_errno == ENOTSOCK) {
6454                         /* If the fd is not a socket, remove it */
6455                         swrap_remove_stale(fd);
6456                 }
6457         }
6458
6459         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6460                 avail += msg->msg_iov[i].iov_len;
6461         }
6462
6463         SWRAP_LOCK_SI(si);
6464
6465         /* Convert the socket address before we leave */
6466         if (si->type == SOCK_DGRAM && un_addr != NULL) {
6467                 rc = sockaddr_convert_from_un(si,
6468                                               un_addr,
6469                                               un_addrlen,
6470                                               si->family,
6471                                               msg->msg_name,
6472                                               &msg->msg_namelen);
6473                 if (rc == -1) {
6474                         goto done;
6475                 }
6476         }
6477
6478         if (avail == 0) {
6479                 rc = 0;
6480                 goto done;
6481         }
6482
6483         if (ret == -1) {
6484                 remain = MIN(80, avail);
6485         } else {
6486                 remain = ret;
6487         }
6488
6489         /* we capture it as one single packet */
6490         buf = (uint8_t *)malloc(remain);
6491         if (buf == NULL) {
6492                 /* we just not capture the packet */
6493                 SWRAP_UNLOCK_SI(si);
6494                 errno = saved_errno;
6495                 return -1;
6496         }
6497
6498         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6499                 size_t this_time = MIN(remain, (size_t)msg->msg_iov[i].iov_len);
6500                 memcpy(buf + ofs,
6501                        msg->msg_iov[i].iov_base,
6502                        this_time);
6503                 ofs += this_time;
6504                 remain -= this_time;
6505         }
6506
6507         switch (si->type) {
6508         case SOCK_STREAM:
6509                 if (ret == -1 && saved_errno != EAGAIN && saved_errno != ENOBUFS) {
6510                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV_RST, NULL, 0);
6511                 } else if (ret == 0) { /* END OF FILE */
6512                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV_RST, NULL, 0);
6513                 } else if (ret > 0) {
6514                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV, buf, ret);
6515                 }
6516                 break;
6517
6518         case SOCK_DGRAM:
6519                 if (ret == -1) {
6520                         break;
6521                 }
6522
6523                 if (un_addr != NULL) {
6524                         swrap_pcap_dump_packet(si,
6525                                           msg->msg_name,
6526                                           SWRAP_RECVFROM,
6527                                           buf,
6528                                           ret);
6529                 } else {
6530                         swrap_pcap_dump_packet(si,
6531                                           msg->msg_name,
6532                                           SWRAP_RECV,
6533                                           buf,
6534                                           ret);
6535                 }
6536
6537                 break;
6538         }
6539
6540         rc = 0;
6541 done:
6542         free(buf);
6543         errno = saved_errno;
6544
6545 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6546         if (rc == 0 &&
6547             msg->msg_controllen > 0 &&
6548             msg->msg_control != NULL) {
6549                 rc = swrap_msghdr_add_socket_info(si, msg);
6550                 if (rc < 0) {
6551                         SWRAP_UNLOCK_SI(si);
6552                         return -1;
6553                 }
6554         }
6555 #endif
6556
6557         SWRAP_UNLOCK_SI(si);
6558         return rc;
6559 }
6560
6561 /****************************************************************************
6562  *   RECVFROM
6563  ***************************************************************************/
6564
6565 static ssize_t swrap_recvfrom(int s, void *buf, size_t len, int flags,
6566                               struct sockaddr *from, socklen_t *fromlen)
6567 {
6568         struct swrap_address from_addr = {
6569                 .sa_socklen = sizeof(struct sockaddr_un),
6570         };
6571         ssize_t ret;
6572         struct socket_info *si = find_socket_info(s);
6573         struct swrap_address saddr = {
6574                 .sa_socklen = sizeof(struct sockaddr_storage),
6575         };
6576         struct msghdr msg;
6577         struct iovec tmp;
6578         int tret;
6579
6580         if (!si) {
6581                 return libc_recvfrom(s,
6582                                      buf,
6583                                      len,
6584                                      flags,
6585                                      from,
6586                                      fromlen);
6587         }
6588
6589         tmp.iov_base = buf;
6590         tmp.iov_len = len;
6591
6592         ZERO_STRUCT(msg);
6593         if (from != NULL && fromlen != NULL) {
6594                 msg.msg_name = from;   /* optional address */
6595                 msg.msg_namelen = *fromlen; /* size of address */
6596         } else {
6597                 msg.msg_name = &saddr.sa.s; /* optional address */
6598                 msg.msg_namelen = saddr.sa_socklen; /* size of address */
6599         }
6600         msg.msg_iov = &tmp;            /* scatter/gather array */
6601         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6602 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6603         msg.msg_control = NULL;        /* ancillary data, see below */
6604         msg.msg_controllen = 0;        /* ancillary data buffer len */
6605         msg.msg_flags = 0;             /* flags on received message */
6606 #endif
6607
6608         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6609         if (tret < 0) {
6610                 return -1;
6611         }
6612
6613         buf = msg.msg_iov[0].iov_base;
6614         len = msg.msg_iov[0].iov_len;
6615
6616         ret = libc_recvfrom(s,
6617                             buf,
6618                             len,
6619                             flags,
6620                             &from_addr.sa.s,
6621                             &from_addr.sa_socklen);
6622         if (ret == -1) {
6623                 return ret;
6624         }
6625
6626         tret = swrap_recvmsg_after(s,
6627                                    si,
6628                                    &msg,
6629                                    &from_addr.sa.un,
6630                                    from_addr.sa_socklen,
6631                                    ret);
6632         if (tret != 0) {
6633                 return tret;
6634         }
6635
6636         if (from != NULL && fromlen != NULL) {
6637                 *fromlen = msg.msg_namelen;
6638         }
6639
6640         return ret;
6641 }
6642
6643 #ifdef HAVE_ACCEPT_PSOCKLEN_T
6644 ssize_t recvfrom(int s, void *buf, size_t len, int flags,
6645                  struct sockaddr *from, Psocklen_t fromlen)
6646 #else
6647 ssize_t recvfrom(int s, void *buf, size_t len, int flags,
6648                  struct sockaddr *from, socklen_t *fromlen)
6649 #endif
6650 {
6651         return swrap_recvfrom(s, buf, len, flags, from, (socklen_t *)fromlen);
6652 }
6653
6654 /****************************************************************************
6655  *   SENDTO
6656  ***************************************************************************/
6657
6658 static ssize_t swrap_sendto(int s, const void *buf, size_t len, int flags,
6659                             const struct sockaddr *to, socklen_t tolen)
6660 {
6661         struct msghdr msg;
6662         struct iovec tmp;
6663         struct swrap_address un_addr = {
6664                 .sa_socklen = sizeof(struct sockaddr_un),
6665         };
6666         const struct sockaddr_un *to_un = NULL;
6667         ssize_t ret;
6668         int rc;
6669         struct socket_info *si = find_socket_info(s);
6670         int bcast = 0;
6671
6672         if (!si) {
6673                 return libc_sendto(s, buf, len, flags, to, tolen);
6674         }
6675
6676         tmp.iov_base = discard_const_p(char, buf);
6677         tmp.iov_len = len;
6678
6679         ZERO_STRUCT(msg);
6680         msg.msg_name = discard_const_p(struct sockaddr, to); /* optional address */
6681         msg.msg_namelen = tolen;       /* size of address */
6682         msg.msg_iov = &tmp;            /* scatter/gather array */
6683         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6684 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6685         msg.msg_control = NULL;        /* ancillary data, see below */
6686         msg.msg_controllen = 0;        /* ancillary data buffer len */
6687         msg.msg_flags = 0;             /* flags on received message */
6688 #endif
6689
6690         rc = swrap_sendmsg_before(s,
6691                                   si,
6692                                   &msg,
6693                                   &tmp,
6694                                   &un_addr.sa.un,
6695                                   &to_un,
6696                                   &to,
6697                                   &bcast);
6698         if (rc < 0) {
6699                 return -1;
6700         }
6701
6702         buf = msg.msg_iov[0].iov_base;
6703         len = msg.msg_iov[0].iov_len;
6704
6705         if (bcast) {
6706                 struct stat st;
6707                 unsigned int iface;
6708                 unsigned int prt = ntohs(((const struct sockaddr_in *)(const void *)to)->sin_port);
6709                 char type;
6710                 char *swrap_dir = NULL;
6711
6712                 type = SOCKET_TYPE_CHAR_UDP;
6713
6714                 swrap_dir = socket_wrapper_dir();
6715                 if (swrap_dir == NULL) {
6716                         return -1;
6717                 }
6718
6719                 for(iface=0; iface <= MAX_WRAPPED_INTERFACES; iface++) {
6720                         swrap_un_path(&un_addr.sa.un,
6721                                       swrap_dir,
6722                                       type,
6723                                       iface,
6724                                       prt);
6725                         if (stat(un_addr.sa.un.sun_path, &st) != 0) continue;
6726
6727                         /* ignore the any errors in broadcast sends */
6728                         libc_sendto(s,
6729                                     buf,
6730                                     len,
6731                                     flags,
6732                                     &un_addr.sa.s,
6733                                     un_addr.sa_socklen);
6734                 }
6735
6736                 SAFE_FREE(swrap_dir);
6737
6738                 SWRAP_LOCK_SI(si);
6739
6740                 swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6741
6742                 SWRAP_UNLOCK_SI(si);
6743
6744                 return len;
6745         }
6746
6747         SWRAP_LOCK_SI(si);
6748         /*
6749          * If it is a dgram socket and we are connected, don't include the
6750          * 'to' address.
6751          */
6752         if (si->type == SOCK_DGRAM && si->connected) {
6753                 ret = libc_sendto(s,
6754                                   buf,
6755                                   len,
6756                                   flags,
6757                                   NULL,
6758                                   0);
6759         } else {
6760                 ret = libc_sendto(s,
6761                                   buf,
6762                                   len,
6763                                   flags,
6764                                   (struct sockaddr *)msg.msg_name,
6765                                   msg.msg_namelen);
6766         }
6767
6768         SWRAP_UNLOCK_SI(si);
6769
6770         swrap_sendmsg_after(s, si, &msg, to, ret);
6771
6772         return ret;
6773 }
6774
6775 ssize_t sendto(int s, const void *buf, size_t len, int flags,
6776                const struct sockaddr *to, socklen_t tolen)
6777 {
6778         return swrap_sendto(s, buf, len, flags, to, tolen);
6779 }
6780
6781 /****************************************************************************
6782  *   READV
6783  ***************************************************************************/
6784
6785 static ssize_t swrap_recv(int s, void *buf, size_t len, int flags)
6786 {
6787         struct socket_info *si;
6788         struct msghdr msg;
6789         struct swrap_address saddr = {
6790                 .sa_socklen = sizeof(struct sockaddr_storage),
6791         };
6792         struct iovec tmp;
6793         ssize_t ret;
6794         int tret;
6795
6796         si = find_socket_info(s);
6797         if (si == NULL) {
6798                 return libc_recv(s, buf, len, flags);
6799         }
6800
6801         tmp.iov_base = buf;
6802         tmp.iov_len = len;
6803
6804         ZERO_STRUCT(msg);
6805         msg.msg_name = &saddr.sa.s;    /* optional address */
6806         msg.msg_namelen = saddr.sa_socklen; /* size of address */
6807         msg.msg_iov = &tmp;            /* scatter/gather array */
6808         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6809 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6810         msg.msg_control = NULL;        /* ancillary data, see below */
6811         msg.msg_controllen = 0;        /* ancillary data buffer len */
6812         msg.msg_flags = 0;             /* flags on received message */
6813 #endif
6814
6815         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6816         if (tret < 0) {
6817                 return -1;
6818         }
6819
6820         buf = msg.msg_iov[0].iov_base;
6821         len = msg.msg_iov[0].iov_len;
6822
6823         ret = libc_recv(s, buf, len, flags);
6824
6825         tret = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
6826         if (tret != 0) {
6827                 return tret;
6828         }
6829
6830         return ret;
6831 }
6832
6833 ssize_t recv(int s, void *buf, size_t len, int flags)
6834 {
6835         return swrap_recv(s, buf, len, flags);
6836 }
6837
6838 /****************************************************************************
6839  *   READ
6840  ***************************************************************************/
6841
6842 static ssize_t swrap_read(int s, void *buf, size_t len)
6843 {
6844         struct socket_info *si;
6845         struct msghdr msg;
6846         struct iovec tmp;
6847         struct swrap_address saddr = {
6848                 .sa_socklen = sizeof(struct sockaddr_storage),
6849         };
6850         ssize_t ret;
6851         int tret;
6852
6853         si = find_socket_info(s);
6854         if (si == NULL) {
6855                 return libc_read(s, buf, len);
6856         }
6857
6858         tmp.iov_base = buf;
6859         tmp.iov_len = len;
6860
6861         ZERO_STRUCT(msg);
6862         msg.msg_name = &saddr.sa.ss;   /* optional address */
6863         msg.msg_namelen = saddr.sa_socklen; /* size of address */
6864         msg.msg_iov = &tmp;            /* scatter/gather array */
6865         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6866 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6867         msg.msg_control = NULL;        /* ancillary data, see below */
6868         msg.msg_controllen = 0;        /* ancillary data buffer len */
6869         msg.msg_flags = 0;             /* flags on received message */
6870 #endif
6871
6872         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6873         if (tret < 0) {
6874                 if (tret == -ENOTSOCK) {
6875                         return libc_read(s, buf, len);
6876                 }
6877                 return -1;
6878         }
6879
6880         buf = msg.msg_iov[0].iov_base;
6881         len = msg.msg_iov[0].iov_len;
6882
6883         ret = libc_read(s, buf, len);
6884
6885         tret = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
6886         if (tret != 0) {
6887                 return tret;
6888         }
6889
6890         return ret;
6891 }
6892
6893 ssize_t read(int s, void *buf, size_t len)
6894 {
6895         return swrap_read(s, buf, len);
6896 }
6897
6898 /****************************************************************************
6899  *   WRITE
6900  ***************************************************************************/
6901
6902 static ssize_t swrap_write(int s, const void *buf, size_t len)
6903 {
6904         struct msghdr msg;
6905         struct iovec tmp;
6906         struct sockaddr_un un_addr;
6907         ssize_t ret;
6908         int rc;
6909         struct socket_info *si;
6910
6911         si = find_socket_info(s);
6912         if (si == NULL) {
6913                 return libc_write(s, buf, len);
6914         }
6915
6916         tmp.iov_base = discard_const_p(char, buf);
6917         tmp.iov_len = len;
6918
6919         ZERO_STRUCT(msg);
6920         msg.msg_name = NULL;           /* optional address */
6921         msg.msg_namelen = 0;           /* size of address */
6922         msg.msg_iov = &tmp;            /* scatter/gather array */
6923         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6924 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6925         msg.msg_control = NULL;        /* ancillary data, see below */
6926         msg.msg_controllen = 0;        /* ancillary data buffer len */
6927         msg.msg_flags = 0;             /* flags on received message */
6928 #endif
6929
6930         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
6931         if (rc < 0) {
6932                 return -1;
6933         }
6934
6935         buf = msg.msg_iov[0].iov_base;
6936         len = msg.msg_iov[0].iov_len;
6937
6938         ret = libc_write(s, buf, len);
6939
6940         swrap_sendmsg_after(s, si, &msg, NULL, ret);
6941
6942         return ret;
6943 }
6944
6945 ssize_t write(int s, const void *buf, size_t len)
6946 {
6947         return swrap_write(s, buf, len);
6948 }
6949
6950 /****************************************************************************
6951  *   SEND
6952  ***************************************************************************/
6953
6954 static ssize_t swrap_send(int s, const void *buf, size_t len, int flags)
6955 {
6956         struct msghdr msg;
6957         struct iovec tmp;
6958         struct sockaddr_un un_addr;
6959         ssize_t ret;
6960         int rc;
6961         struct socket_info *si = find_socket_info(s);
6962
6963         if (!si) {
6964                 return libc_send(s, buf, len, flags);
6965         }
6966
6967         tmp.iov_base = discard_const_p(char, buf);
6968         tmp.iov_len = len;
6969
6970         ZERO_STRUCT(msg);
6971         msg.msg_name = NULL;           /* optional address */
6972         msg.msg_namelen = 0;           /* size of address */
6973         msg.msg_iov = &tmp;            /* scatter/gather array */
6974         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6975 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6976         msg.msg_control = NULL;        /* ancillary data, see below */
6977         msg.msg_controllen = 0;        /* ancillary data buffer len */
6978         msg.msg_flags = 0;             /* flags on received message */
6979 #endif
6980
6981         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
6982         if (rc < 0) {
6983                 return -1;
6984         }
6985
6986         buf = msg.msg_iov[0].iov_base;
6987         len = msg.msg_iov[0].iov_len;
6988
6989         ret = libc_send(s, buf, len, flags);
6990
6991         swrap_sendmsg_after(s, si, &msg, NULL, ret);
6992
6993         return ret;
6994 }
6995
6996 ssize_t send(int s, const void *buf, size_t len, int flags)
6997 {
6998         return swrap_send(s, buf, len, flags);
6999 }
7000
7001 /****************************************************************************
7002  *   RECVMSG
7003  ***************************************************************************/
7004
7005 static ssize_t swrap_recvmsg(int s, struct msghdr *omsg, int flags)
7006 {
7007         struct swrap_address from_addr = {
7008                 .sa_socklen = sizeof(struct sockaddr_un),
7009         };
7010         struct swrap_address convert_addr = {
7011                 .sa_socklen = sizeof(struct sockaddr_storage),
7012         };
7013         struct socket_info *si;
7014         struct msghdr msg;
7015         struct iovec tmp;
7016 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7017         size_t msg_ctrllen_filled;
7018         size_t msg_ctrllen_left;
7019 #endif
7020
7021         ssize_t ret;
7022         int rc;
7023
7024         si = find_socket_info(s);
7025         if (si == NULL) {
7026                 uint8_t *tmp_control = NULL;
7027                 rc = swrap_recvmsg_before_unix(omsg, &msg, &tmp_control);
7028                 if (rc < 0) {
7029                         return rc;
7030                 }
7031                 ret = libc_recvmsg(s, &msg, flags);
7032                 return swrap_recvmsg_after_unix(&msg, &tmp_control, omsg, ret);
7033         }
7034
7035         tmp.iov_base = NULL;
7036         tmp.iov_len = 0;
7037
7038         ZERO_STRUCT(msg);
7039         msg.msg_name = &from_addr.sa;              /* optional address */
7040         msg.msg_namelen = from_addr.sa_socklen;    /* size of address */
7041         msg.msg_iov = omsg->msg_iov;               /* scatter/gather array */
7042         msg.msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7043 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7044         msg_ctrllen_filled = 0;
7045         msg_ctrllen_left = omsg->msg_controllen;
7046
7047         msg.msg_control = omsg->msg_control;       /* ancillary data, see below */
7048         msg.msg_controllen = omsg->msg_controllen; /* ancillary data buffer len */
7049         msg.msg_flags = omsg->msg_flags;           /* flags on received message */
7050 #endif
7051
7052         rc = swrap_recvmsg_before(s, si, &msg, &tmp);
7053         if (rc < 0) {
7054                 return -1;
7055         }
7056
7057         ret = libc_recvmsg(s, &msg, flags);
7058
7059 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7060         msg_ctrllen_filled += msg.msg_controllen;
7061         msg_ctrllen_left -= msg.msg_controllen;
7062
7063         if (omsg->msg_control != NULL) {
7064                 uint8_t *p;
7065
7066                 p = omsg->msg_control;
7067                 p += msg_ctrllen_filled;
7068
7069                 msg.msg_control = p;
7070                 msg.msg_controllen = msg_ctrllen_left;
7071         } else {
7072                 msg.msg_control = NULL;
7073                 msg.msg_controllen = 0;
7074         }
7075 #endif
7076
7077         /*
7078          * We convert the unix address to a IP address so we need a buffer
7079          * which can store the address in case of SOCK_DGRAM, see below.
7080          */
7081         msg.msg_name = &convert_addr.sa;
7082         msg.msg_namelen = convert_addr.sa_socklen;
7083
7084         rc = swrap_recvmsg_after(s,
7085                                  si,
7086                                  &msg,
7087                                  &from_addr.sa.un,
7088                                  from_addr.sa_socklen,
7089                                  ret);
7090         if (rc != 0) {
7091                 return rc;
7092         }
7093
7094 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7095         if (omsg->msg_control != NULL) {
7096                 /* msg.msg_controllen = space left */
7097                 msg_ctrllen_left = msg.msg_controllen;
7098                 msg_ctrllen_filled = omsg->msg_controllen - msg_ctrllen_left;
7099         }
7100
7101         /* Update the original message length */
7102         omsg->msg_controllen = msg_ctrllen_filled;
7103         omsg->msg_flags = msg.msg_flags;
7104 #endif
7105         omsg->msg_iovlen = msg.msg_iovlen;
7106
7107         SWRAP_LOCK_SI(si);
7108
7109         /*
7110          * From the manpage:
7111          *
7112          * The  msg_name  field  points  to a caller-allocated buffer that is
7113          * used to return the source address if the socket is unconnected.  The
7114          * caller should set msg_namelen to the size of this buffer before this
7115          * call; upon return from a successful call, msg_name will contain the
7116          * length of the returned address.  If the application  does  not  need
7117          * to know the source address, msg_name can be specified as NULL.
7118          */
7119         if (si->type == SOCK_STREAM) {
7120                 omsg->msg_namelen = 0;
7121         } else if (omsg->msg_name != NULL &&
7122                    omsg->msg_namelen != 0 &&
7123                    omsg->msg_namelen >= msg.msg_namelen) {
7124                 memcpy(omsg->msg_name, msg.msg_name, msg.msg_namelen);
7125                 omsg->msg_namelen = msg.msg_namelen;
7126         }
7127
7128         SWRAP_UNLOCK_SI(si);
7129
7130         return ret;
7131 }
7132
7133 ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags)
7134 {
7135         return swrap_recvmsg(sockfd, msg, flags);
7136 }
7137
7138 /****************************************************************************
7139  *   SENDMSG
7140  ***************************************************************************/
7141
7142 static ssize_t swrap_sendmsg(int s, const struct msghdr *omsg, int flags)
7143 {
7144         struct msghdr msg;
7145         struct iovec tmp;
7146         struct sockaddr_un un_addr;
7147         const struct sockaddr_un *to_un = NULL;
7148         const struct sockaddr *to = NULL;
7149         ssize_t ret;
7150         int rc;
7151         struct socket_info *si = find_socket_info(s);
7152         int bcast = 0;
7153
7154         if (!si) {
7155                 int scm_rights_pipe_fd = -1;
7156
7157                 rc = swrap_sendmsg_before_unix(omsg, &msg,
7158                                                &scm_rights_pipe_fd);
7159                 if (rc < 0) {
7160                         return rc;
7161                 }
7162                 ret = libc_sendmsg(s, &msg, flags);
7163                 return swrap_sendmsg_after_unix(&msg, ret, scm_rights_pipe_fd);
7164         }
7165
7166         ZERO_STRUCT(un_addr);
7167
7168         tmp.iov_base = NULL;
7169         tmp.iov_len = 0;
7170
7171         ZERO_STRUCT(msg);
7172
7173         SWRAP_LOCK_SI(si);
7174
7175         if (si->connected == 0) {
7176                 msg.msg_name = omsg->msg_name;             /* optional address */
7177                 msg.msg_namelen = omsg->msg_namelen;       /* size of address */
7178         }
7179         msg.msg_iov = omsg->msg_iov;               /* scatter/gather array */
7180         msg.msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7181
7182         SWRAP_UNLOCK_SI(si);
7183
7184 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7185         if (omsg != NULL && omsg->msg_controllen > 0 && omsg->msg_control != NULL) {
7186                 uint8_t *cmbuf = NULL;
7187                 size_t cmlen = 0;
7188
7189                 rc = swrap_sendmsg_filter_cmsghdr(omsg, &cmbuf, &cmlen);
7190                 if (rc < 0) {
7191                         return rc;
7192                 }
7193
7194                 if (cmlen == 0) {
7195                         msg.msg_controllen = 0;
7196                         msg.msg_control = NULL;
7197                 } else {
7198                         msg.msg_control = cmbuf;
7199                         msg.msg_controllen = cmlen;
7200                 }
7201         }
7202         msg.msg_flags = omsg->msg_flags;           /* flags on received message */
7203 #endif
7204         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, &to_un, &to, &bcast);
7205         if (rc < 0) {
7206                 int saved_errno = errno;
7207 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7208                 SAFE_FREE(msg.msg_control);
7209 #endif
7210                 errno = saved_errno;
7211                 return -1;
7212         }
7213
7214         if (bcast) {
7215                 struct stat st;
7216                 unsigned int iface;
7217                 unsigned int prt = ntohs(((const struct sockaddr_in *)(const void *)to)->sin_port);
7218                 char type;
7219                 size_t i, len = 0;
7220                 uint8_t *buf;
7221                 off_t ofs = 0;
7222                 size_t avail = 0;
7223                 size_t remain;
7224                 char *swrap_dir = NULL;
7225
7226                 for (i = 0; i < (size_t)msg.msg_iovlen; i++) {
7227                         avail += msg.msg_iov[i].iov_len;
7228                 }
7229
7230                 len = avail;
7231                 remain = avail;
7232
7233                 /* we capture it as one single packet */
7234                 buf = (uint8_t *)malloc(remain);
7235                 if (!buf) {
7236                         int saved_errno = errno;
7237 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7238                         SAFE_FREE(msg.msg_control);
7239 #endif
7240                         errno = saved_errno;
7241                         return -1;
7242                 }
7243
7244                 for (i = 0; i < (size_t)msg.msg_iovlen; i++) {
7245                         size_t this_time = MIN(remain, (size_t)msg.msg_iov[i].iov_len);
7246                         memcpy(buf + ofs,
7247                                msg.msg_iov[i].iov_base,
7248                                this_time);
7249                         ofs += this_time;
7250                         remain -= this_time;
7251                 }
7252
7253                 type = SOCKET_TYPE_CHAR_UDP;
7254
7255                 swrap_dir = socket_wrapper_dir();
7256                 if (swrap_dir == NULL) {
7257                         int saved_errno = errno;
7258 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7259                         SAFE_FREE(msg.msg_control);
7260 #endif
7261                         SAFE_FREE(buf);
7262                         errno = saved_errno;
7263                         return -1;
7264                 }
7265
7266                 for(iface=0; iface <= MAX_WRAPPED_INTERFACES; iface++) {
7267                         swrap_un_path(&un_addr, swrap_dir, type, iface, prt);
7268                         if (stat(un_addr.sun_path, &st) != 0) continue;
7269
7270                         msg.msg_name = &un_addr;           /* optional address */
7271                         msg.msg_namelen = sizeof(un_addr); /* size of address */
7272
7273                         /* ignore the any errors in broadcast sends */
7274                         libc_sendmsg(s, &msg, flags);
7275                 }
7276
7277                 SAFE_FREE(swrap_dir);
7278
7279                 SWRAP_LOCK_SI(si);
7280
7281                 swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
7282                 free(buf);
7283
7284                 SWRAP_UNLOCK_SI(si);
7285
7286                 return len;
7287         }
7288
7289         ret = libc_sendmsg(s, &msg, flags);
7290
7291         swrap_sendmsg_after(s, si, &msg, to, ret);
7292
7293 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7294         {
7295                 int saved_errno = errno;
7296                 SAFE_FREE(msg.msg_control);
7297                 errno = saved_errno;
7298         }
7299 #endif
7300
7301         return ret;
7302 }
7303
7304 ssize_t sendmsg(int s, const struct msghdr *omsg, int flags)
7305 {
7306         return swrap_sendmsg(s, omsg, flags);
7307 }
7308
7309 /****************************************************************************
7310  *   READV
7311  ***************************************************************************/
7312
7313 static ssize_t swrap_readv(int s, const struct iovec *vector, int count)
7314 {
7315         struct socket_info *si;
7316         struct msghdr msg;
7317         struct iovec tmp;
7318         struct swrap_address saddr = {
7319                 .sa_socklen = sizeof(struct sockaddr_storage)
7320         };
7321         ssize_t ret;
7322         int rc;
7323
7324         si = find_socket_info(s);
7325         if (si == NULL) {
7326                 return libc_readv(s, vector, count);
7327         }
7328
7329         tmp.iov_base = NULL;
7330         tmp.iov_len = 0;
7331
7332         ZERO_STRUCT(msg);
7333         msg.msg_name = &saddr.sa.s; /* optional address */
7334         msg.msg_namelen = saddr.sa_socklen;      /* size of address */
7335         msg.msg_iov = discard_const_p(struct iovec, vector); /* scatter/gather array */
7336         msg.msg_iovlen = count;        /* # elements in msg_iov */
7337 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7338         msg.msg_control = NULL;        /* ancillary data, see below */
7339         msg.msg_controllen = 0;        /* ancillary data buffer len */
7340         msg.msg_flags = 0;             /* flags on received message */
7341 #endif
7342
7343         rc = swrap_recvmsg_before(s, si, &msg, &tmp);
7344         if (rc < 0) {
7345                 if (rc == -ENOTSOCK) {
7346                         return libc_readv(s, vector, count);
7347                 }
7348                 return -1;
7349         }
7350
7351         ret = libc_readv(s, msg.msg_iov, msg.msg_iovlen);
7352
7353         rc = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
7354         if (rc != 0) {
7355                 return rc;
7356         }
7357
7358         return ret;
7359 }
7360
7361 ssize_t readv(int s, const struct iovec *vector, int count)
7362 {
7363         return swrap_readv(s, vector, count);
7364 }
7365
7366 /****************************************************************************
7367  *   WRITEV
7368  ***************************************************************************/
7369
7370 static ssize_t swrap_writev(int s, const struct iovec *vector, int count)
7371 {
7372         struct msghdr msg;
7373         struct iovec tmp;
7374         struct sockaddr_un un_addr;
7375         ssize_t ret;
7376         int rc;
7377         struct socket_info *si = find_socket_info(s);
7378
7379         if (!si) {
7380                 return libc_writev(s, vector, count);
7381         }
7382
7383         tmp.iov_base = NULL;
7384         tmp.iov_len = 0;
7385
7386         ZERO_STRUCT(msg);
7387         msg.msg_name = NULL;           /* optional address */
7388         msg.msg_namelen = 0;           /* size of address */
7389         msg.msg_iov = discard_const_p(struct iovec, vector); /* scatter/gather array */
7390         msg.msg_iovlen = count;        /* # elements in msg_iov */
7391 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7392         msg.msg_control = NULL;        /* ancillary data, see below */
7393         msg.msg_controllen = 0;        /* ancillary data buffer len */
7394         msg.msg_flags = 0;             /* flags on received message */
7395 #endif
7396
7397         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
7398         if (rc < 0) {
7399                 if (rc == -ENOTSOCK) {
7400                         return libc_readv(s, vector, count);
7401                 }
7402                 return -1;
7403         }
7404
7405         ret = libc_writev(s, msg.msg_iov, msg.msg_iovlen);
7406
7407         swrap_sendmsg_after(s, si, &msg, NULL, ret);
7408
7409         return ret;
7410 }
7411
7412 ssize_t writev(int s, const struct iovec *vector, int count)
7413 {
7414         return swrap_writev(s, vector, count);
7415 }
7416
7417 /****************************
7418  * CLOSE
7419  ***************************/
7420
7421 static int swrap_remove_wrapper(const char *__func_name,
7422                                 int (*__close_fd_fn)(int fd),
7423                                 int fd)
7424 {
7425         struct socket_info *si = NULL;
7426         int si_index;
7427         int ret_errno = errno;
7428         int ret;
7429
7430         swrap_mutex_lock(&socket_reset_mutex);
7431
7432         si_index = find_socket_info_index(fd);
7433         if (si_index == -1) {
7434                 swrap_mutex_unlock(&socket_reset_mutex);
7435                 return __close_fd_fn(fd);
7436         }
7437
7438         swrap_log(SWRAP_LOG_TRACE, __func_name, "Remove wrapper for fd=%d", fd);
7439         reset_socket_info_index(fd);
7440
7441         si = swrap_get_socket_info(si_index);
7442
7443         swrap_mutex_lock(&first_free_mutex);
7444         SWRAP_LOCK_SI(si);
7445
7446         ret = __close_fd_fn(fd);
7447         if (ret == -1) {
7448                 ret_errno = errno;
7449         }
7450
7451         swrap_dec_refcount(si);
7452
7453         if (swrap_get_refcount(si) > 0) {
7454                 /* there are still references left */
7455                 goto out;
7456         }
7457
7458         if (si->fd_passed) {
7459                 goto set_next_free;
7460         }
7461
7462         if (si->myname.sa_socklen > 0 && si->peername.sa_socklen > 0) {
7463                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_SEND, NULL, 0);
7464         }
7465
7466         if (si->myname.sa_socklen > 0 && si->peername.sa_socklen > 0) {
7467                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_RECV, NULL, 0);
7468                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_ACK, NULL, 0);
7469         }
7470
7471         if (si->un_addr.sun_path[0] != '\0') {
7472                 unlink(si->un_addr.sun_path);
7473         }
7474
7475 set_next_free:
7476         swrap_set_next_free(si, first_free);
7477         first_free = si_index;
7478
7479 out:
7480         SWRAP_UNLOCK_SI(si);
7481         swrap_mutex_unlock(&first_free_mutex);
7482         swrap_mutex_unlock(&socket_reset_mutex);
7483
7484         errno = ret_errno;
7485         return ret;
7486 }
7487
7488 static int swrap_noop_close(int fd)
7489 {
7490         (void)fd; /* unused */
7491         return 0;
7492 }
7493
7494 static void swrap_remove_stale(int fd)
7495 {
7496         swrap_remove_wrapper(__func__, swrap_noop_close, fd);
7497 }
7498
7499 /*
7500  * This allows socket_wrapper aware applications to
7501  * indicate that the given fd does not belong to
7502  * an inet socket.
7503  *
7504  * We already overload a lot of unrelated functions
7505  * like eventfd(), timerfd_create(), ... in order to
7506  * call swrap_remove_stale() on the returned fd, but
7507  * we'll never be able to handle all possible syscalls.
7508  *
7509  * socket_wrapper_indicate_no_inet_fd() gives them a way
7510  * to do the same.
7511  *
7512  * We don't export swrap_remove_stale() in order to
7513  * make it easier to analyze SOCKET_WRAPPER_DEBUGLEVEL=3
7514  * log files.
7515  */
7516 void socket_wrapper_indicate_no_inet_fd(int fd)
7517 {
7518         swrap_remove_wrapper(__func__, swrap_noop_close, fd);
7519 }
7520
7521 static int swrap_close(int fd)
7522 {
7523         return swrap_remove_wrapper(__func__, libc_close, fd);
7524 }
7525
7526 int close(int fd)
7527 {
7528         return swrap_close(fd);
7529 }
7530
7531 #ifdef HAVE___CLOSE_NOCANCEL
7532
7533 static int swrap___close_nocancel(int fd)
7534 {
7535         return swrap_remove_wrapper(__func__, libc___close_nocancel, fd);
7536 }
7537
7538 int __close_nocancel(int fd);
7539 int __close_nocancel(int fd)
7540 {
7541         return swrap___close_nocancel(fd);
7542 }
7543
7544 #endif /* HAVE___CLOSE_NOCANCEL */
7545
7546 /****************************
7547  * DUP
7548  ***************************/
7549
7550 static int swrap_dup(int fd)
7551 {
7552         struct socket_info *si;
7553         int dup_fd, idx;
7554
7555         idx = find_socket_info_index(fd);
7556         if (idx == -1) {
7557                 return libc_dup(fd);
7558         }
7559
7560         si = swrap_get_socket_info(idx);
7561
7562         dup_fd = libc_dup(fd);
7563         if (dup_fd == -1) {
7564                 int saved_errno = errno;
7565                 errno = saved_errno;
7566                 return -1;
7567         }
7568
7569         if ((size_t)dup_fd >= socket_fds_max) {
7570                 SWRAP_LOG(SWRAP_LOG_ERROR,
7571                           "The max socket index limit of %zu has been reached, "
7572                           "trying to add %d",
7573                           socket_fds_max,
7574                           dup_fd);
7575                 libc_close(dup_fd);
7576                 errno = EMFILE;
7577                 return -1;
7578         }
7579
7580         SWRAP_LOCK_SI(si);
7581
7582         swrap_inc_refcount(si);
7583
7584         SWRAP_UNLOCK_SI(si);
7585
7586         /* Make sure we don't have an entry for the fd */
7587         swrap_remove_stale(dup_fd);
7588
7589         set_socket_info_index(dup_fd, idx);
7590
7591         return dup_fd;
7592 }
7593
7594 int dup(int fd)
7595 {
7596         return swrap_dup(fd);
7597 }
7598
7599 /****************************
7600  * DUP2
7601  ***************************/
7602
7603 static int swrap_dup2(int fd, int newfd)
7604 {
7605         struct socket_info *si;
7606         int dup_fd, idx;
7607
7608         idx = find_socket_info_index(fd);
7609         if (idx == -1) {
7610                 return libc_dup2(fd, newfd);
7611         }
7612
7613         si = swrap_get_socket_info(idx);
7614
7615         if (fd == newfd) {
7616                 /*
7617                  * According to the manpage:
7618                  *
7619                  * "If oldfd is a valid file descriptor, and newfd has the same
7620                  * value as oldfd, then dup2() does nothing, and returns newfd."
7621                  */
7622                 return newfd;
7623         }
7624
7625         if ((size_t)newfd >= socket_fds_max) {
7626                 SWRAP_LOG(SWRAP_LOG_ERROR,
7627                           "The max socket index limit of %zu has been reached, "
7628                           "trying to add %d",
7629                           socket_fds_max,
7630                           newfd);
7631                 errno = EMFILE;
7632                 return -1;
7633         }
7634
7635         if (find_socket_info(newfd)) {
7636                 /* dup2() does an implicit close of newfd, which we
7637                  * need to emulate */
7638                 swrap_close(newfd);
7639         }
7640
7641         dup_fd = libc_dup2(fd, newfd);
7642         if (dup_fd == -1) {
7643                 int saved_errno = errno;
7644                 errno = saved_errno;
7645                 return -1;
7646         }
7647
7648         SWRAP_LOCK_SI(si);
7649
7650         swrap_inc_refcount(si);
7651
7652         SWRAP_UNLOCK_SI(si);
7653
7654         /* Make sure we don't have an entry for the fd */
7655         swrap_remove_stale(dup_fd);
7656
7657         set_socket_info_index(dup_fd, idx);
7658
7659         return dup_fd;
7660 }
7661
7662 int dup2(int fd, int newfd)
7663 {
7664         return swrap_dup2(fd, newfd);
7665 }
7666
7667 /****************************
7668  * FCNTL
7669  ***************************/
7670
7671 static int swrap_vfcntl(int fd, int cmd, va_list va)
7672 {
7673         struct socket_info *si;
7674         int rc, dup_fd, idx;
7675
7676         idx = find_socket_info_index(fd);
7677         if (idx == -1) {
7678                 return libc_vfcntl(fd, cmd, va);
7679         }
7680
7681         si = swrap_get_socket_info(idx);
7682
7683         switch (cmd) {
7684         case F_DUPFD:
7685                 dup_fd = libc_vfcntl(fd, cmd, va);
7686                 if (dup_fd == -1) {
7687                         int saved_errno = errno;
7688                         errno = saved_errno;
7689                         return -1;
7690                 }
7691
7692                 /* Make sure we don't have an entry for the fd */
7693                 swrap_remove_stale(dup_fd);
7694
7695                 if ((size_t)dup_fd >= socket_fds_max) {
7696                         SWRAP_LOG(SWRAP_LOG_ERROR,
7697                           "The max socket index limit of %zu has been reached, "
7698                           "trying to add %d",
7699                           socket_fds_max,
7700                           dup_fd);
7701                         libc_close(dup_fd);
7702                         errno = EMFILE;
7703                         return -1;
7704                 }
7705
7706                 SWRAP_LOCK_SI(si);
7707
7708                 swrap_inc_refcount(si);
7709
7710                 SWRAP_UNLOCK_SI(si);
7711
7712
7713                 set_socket_info_index(dup_fd, idx);
7714
7715                 rc = dup_fd;
7716                 break;
7717         default:
7718                 rc = libc_vfcntl(fd, cmd, va);
7719                 break;
7720         }
7721
7722         return rc;
7723 }
7724
7725 int fcntl(int fd, int cmd, ...)
7726 {
7727         va_list va;
7728         int rc;
7729
7730         va_start(va, cmd);
7731
7732         rc = swrap_vfcntl(fd, cmd, va);
7733
7734         va_end(va);
7735
7736         return rc;
7737 }
7738
7739 /****************************
7740  * EVENTFD
7741  ***************************/
7742
7743 #ifdef HAVE_EVENTFD
7744 static int swrap_eventfd(int count, int flags)
7745 {
7746         int fd;
7747
7748         fd = libc_eventfd(count, flags);
7749         if (fd != -1) {
7750                 swrap_remove_stale(fd);
7751         }
7752
7753         return fd;
7754 }
7755
7756 #ifdef HAVE_EVENTFD_UNSIGNED_INT
7757 int eventfd(unsigned int count, int flags)
7758 #else
7759 int eventfd(int count, int flags)
7760 #endif
7761 {
7762         return swrap_eventfd(count, flags);
7763 }
7764 #endif
7765
7766 #ifdef HAVE_PLEDGE
7767 int pledge(const char *promises, const char *paths[])
7768 {
7769         (void)promises; /* unused */
7770         (void)paths; /* unused */
7771
7772         return 0;
7773 }
7774 #endif /* HAVE_PLEDGE */
7775
7776 static void swrap_thread_prepare(void)
7777 {
7778         /*
7779          * This function should only be called here!!
7780          *
7781          * We bind all symobls to avoid deadlocks of the fork is
7782          * interrupted by a signal handler using a symbol of this
7783          * library.
7784          */
7785         swrap_bind_symbol_all();
7786
7787         SWRAP_LOCK_ALL;
7788 }
7789
7790 static void swrap_thread_parent(void)
7791 {
7792         SWRAP_UNLOCK_ALL;
7793 }
7794
7795 static void swrap_thread_child(void)
7796 {
7797         SWRAP_REINIT_ALL;
7798 }
7799
7800 /****************************
7801  * CONSTRUCTOR
7802  ***************************/
7803 void swrap_constructor(void)
7804 {
7805         if (PIPE_BUF < sizeof(struct swrap_unix_scm_rights)) {
7806                 SWRAP_LOG(SWRAP_LOG_ERROR,
7807                           "PIPE_BUF=%zu < "
7808                           "sizeof(struct swrap_unix_scm_rights)=%zu\n"
7809                           "sizeof(struct swrap_unix_scm_rights_payload)=%zu "
7810                           "sizeof(struct socket_info)=%zu",
7811                           (size_t)PIPE_BUF,
7812                           sizeof(struct swrap_unix_scm_rights),
7813                           sizeof(struct swrap_unix_scm_rights_payload),
7814                           sizeof(struct socket_info));
7815                 exit(-1);
7816         }
7817
7818         SWRAP_REINIT_ALL;
7819
7820         /*
7821         * If we hold a lock and the application forks, then the child
7822         * is not able to unlock the mutex and we are in a deadlock.
7823         * This should prevent such deadlocks.
7824         */
7825         pthread_atfork(&swrap_thread_prepare,
7826                        &swrap_thread_parent,
7827                        &swrap_thread_child);
7828 }
7829
7830 /****************************
7831  * DESTRUCTOR
7832  ***************************/
7833
7834 /*
7835  * This function is called when the library is unloaded and makes sure that
7836  * sockets get closed and the unix file for the socket are unlinked.
7837  */
7838 void swrap_destructor(void)
7839 {
7840         size_t i;
7841
7842         if (socket_fds_idx != NULL) {
7843                 for (i = 0; i < socket_fds_max; ++i) {
7844                         if (socket_fds_idx[i] != -1) {
7845                                 swrap_close(i);
7846                         }
7847                 }
7848                 SAFE_FREE(socket_fds_idx);
7849         }
7850
7851         SAFE_FREE(sockets);
7852
7853         if (swrap.libc.handle != NULL
7854 #ifdef RTLD_NEXT
7855             && swrap.libc.handle != RTLD_NEXT
7856 #endif
7857                         ) {
7858                 dlclose(swrap.libc.handle);
7859         }
7860         if (swrap.libc.socket_handle
7861 #ifdef RTLD_NEXT
7862             && swrap.libc.socket_handle != RTLD_NEXT
7863 #endif
7864                         ) {
7865                 dlclose(swrap.libc.socket_handle);
7866         }
7867 }
7868
7869 #if defined(HAVE__SOCKET) && defined(HAVE__CLOSE)
7870 /*
7871  * On FreeBSD 12 (and maybe other platforms)
7872  * system libraries like libresolv prefix there
7873  * syscalls with '_' in order to always use
7874  * the symbols from libc.
7875  *
7876  * In the interaction with resolv_wrapper,
7877  * we need to inject socket wrapper into libresolv,
7878  * which means we need to private all socket
7879  * related syscalls also with the '_' prefix.
7880  *
7881  * This is tested in Samba's 'make test',
7882  * there we noticed that providing '_read',
7883  * '_open' and '_close' would cause errors, which
7884  * means we skip '_read', '_write' and
7885  * all non socket related calls without
7886  * further analyzing the problem.
7887  */
7888 #define SWRAP_SYMBOL_ALIAS(__sym, __aliassym) \
7889         extern typeof(__sym) __aliassym __attribute__ ((alias(#__sym)))
7890
7891 #ifdef HAVE_ACCEPT4
7892 SWRAP_SYMBOL_ALIAS(accept4, _accept4);
7893 #endif
7894 SWRAP_SYMBOL_ALIAS(accept, _accept);
7895 SWRAP_SYMBOL_ALIAS(bind, _bind);
7896 SWRAP_SYMBOL_ALIAS(connect, _connect);
7897 SWRAP_SYMBOL_ALIAS(dup, _dup);
7898 SWRAP_SYMBOL_ALIAS(dup2, _dup2);
7899 SWRAP_SYMBOL_ALIAS(fcntl, _fcntl);
7900 SWRAP_SYMBOL_ALIAS(getpeername, _getpeername);
7901 SWRAP_SYMBOL_ALIAS(getsockname, _getsockname);
7902 SWRAP_SYMBOL_ALIAS(getsockopt, _getsockopt);
7903 SWRAP_SYMBOL_ALIAS(ioctl, _ioctl);
7904 SWRAP_SYMBOL_ALIAS(listen, _listen);
7905 SWRAP_SYMBOL_ALIAS(readv, _readv);
7906 SWRAP_SYMBOL_ALIAS(recv, _recv);
7907 SWRAP_SYMBOL_ALIAS(recvfrom, _recvfrom);
7908 SWRAP_SYMBOL_ALIAS(recvmsg, _recvmsg);
7909 SWRAP_SYMBOL_ALIAS(send, _send);
7910 SWRAP_SYMBOL_ALIAS(sendmsg, _sendmsg);
7911 SWRAP_SYMBOL_ALIAS(sendto, _sendto);
7912 SWRAP_SYMBOL_ALIAS(setsockopt, _setsockopt);
7913 SWRAP_SYMBOL_ALIAS(socket, _socket);
7914 SWRAP_SYMBOL_ALIAS(socketpair, _socketpair);
7915 SWRAP_SYMBOL_ALIAS(writev, _writev);
7916
7917 #endif /* SOCKET_WRAPPER_EXPORT_UNDERSCORE_SYMBOLS */