src/socket_wrapper.c: implement recvmmsg and sendmmsg
[socket_wrapper.git] / src / socket_wrapper.c
1 /*
2  * BSD 3-Clause License
3  *
4  * Copyright (c) 2005-2008, Jelmer Vernooij <jelmer@samba.org>
5  * Copyright (c) 2006-2021, Stefan Metzmacher <metze@samba.org>
6  * Copyright (c) 2013-2021, Andreas Schneider <asn@samba.org>
7  * Copyright (c) 2014-2017, Michael Adam <obnox@samba.org>
8  * Copyright (c) 2016-2018, Anoop C S <anoopcs@redhat.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  *
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  *
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * 3. Neither the name of the author nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38
39 /*
40    Socket wrapper library. Passes all socket communication over
41    unix domain sockets if the environment variable SOCKET_WRAPPER_DIR
42    is set.
43 */
44
45 #include "config.h"
46
47 #include <sys/types.h>
48 #include <sys/time.h>
49 #include <sys/stat.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #ifdef HAVE_SYS_FILIO_H
53 #include <sys/filio.h>
54 #endif
55 #ifdef HAVE_SYS_SIGNALFD_H
56 #include <sys/signalfd.h>
57 #endif
58 #ifdef HAVE_SYS_EVENTFD_H
59 #include <sys/eventfd.h>
60 #endif
61 #ifdef HAVE_SYS_TIMERFD_H
62 #include <sys/timerfd.h>
63 #endif
64 #include <sys/uio.h>
65 #include <errno.h>
66 #include <sys/un.h>
67 #include <netinet/in.h>
68 #include <netinet/tcp.h>
69 #ifdef HAVE_NETINET_TCP_FSM_H
70 #include <netinet/tcp_fsm.h>
71 #endif
72 #include <arpa/inet.h>
73 #include <fcntl.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <stdio.h>
77 #include <stdint.h>
78 #include <stdarg.h>
79 #include <stdbool.h>
80 #include <unistd.h>
81 #ifdef HAVE_GNU_LIB_NAMES_H
82 #include <gnu/lib-names.h>
83 #endif
84 #ifdef HAVE_RPC_RPC_H
85 #include <rpc/rpc.h>
86 #endif
87 #include <pthread.h>
88
89 #include "socket_wrapper.h"
90
91 enum swrap_dbglvl_e {
92         SWRAP_LOG_ERROR = 0,
93         SWRAP_LOG_WARN,
94         SWRAP_LOG_DEBUG,
95         SWRAP_LOG_TRACE
96 };
97
98 /* GCC have printf type attribute check. */
99 #ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT
100 #define PRINTF_ATTRIBUTE(a,b) __attribute__ ((__format__ (__printf__, a, b)))
101 #else
102 #define PRINTF_ATTRIBUTE(a,b)
103 #endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */
104
105 #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
106 #define CONSTRUCTOR_ATTRIBUTE __attribute__ ((constructor))
107 #else
108 #define CONSTRUCTOR_ATTRIBUTE
109 #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
110
111 #ifdef HAVE_DESTRUCTOR_ATTRIBUTE
112 #define DESTRUCTOR_ATTRIBUTE __attribute__ ((destructor))
113 #else
114 #define DESTRUCTOR_ATTRIBUTE
115 #endif
116
117 #ifndef FALL_THROUGH
118 # ifdef HAVE_FALLTHROUGH_ATTRIBUTE
119 #  define FALL_THROUGH __attribute__ ((fallthrough))
120 # else /* HAVE_FALLTHROUGH_ATTRIBUTE */
121 #  define FALL_THROUGH ((void)0)
122 # endif /* HAVE_FALLTHROUGH_ATTRIBUTE */
123 #endif /* FALL_THROUGH */
124
125 #ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE
126 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE __attribute__((no_sanitize_address))
127 #else
128 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
129 #endif
130
131 #ifdef HAVE_GCC_THREAD_LOCAL_STORAGE
132 # define SWRAP_THREAD __thread
133 #else
134 # define SWRAP_THREAD
135 #endif
136
137 #ifndef MIN
138 #define MIN(a,b) ((a)<(b)?(a):(b))
139 #endif
140
141 #ifndef ZERO_STRUCT
142 #define ZERO_STRUCT(x) memset((char *)&(x), 0, sizeof(x))
143 #endif
144
145 #ifndef ZERO_STRUCTP
146 #define ZERO_STRUCTP(x) do { \
147                 if ((x) != NULL) \
148                         memset((char *)(x), 0, sizeof(*(x))); \
149         } while(0)
150 #endif
151
152 #ifndef SAFE_FREE
153 #define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0)
154 #endif
155
156 #ifndef discard_const
157 #define discard_const(ptr) ((void *)((uintptr_t)(ptr)))
158 #endif
159
160 #ifndef discard_const_p
161 #define discard_const_p(type, ptr) ((type *)discard_const(ptr))
162 #endif
163
164 #define UNUSED(x) (void)(x)
165
166 #ifdef IPV6_PKTINFO
167 # ifndef IPV6_RECVPKTINFO
168 #  define IPV6_RECVPKTINFO IPV6_PKTINFO
169 # endif /* IPV6_RECVPKTINFO */
170 #endif /* IPV6_PKTINFO */
171
172 /*
173  * On BSD IP_PKTINFO has a different name because during
174  * the time when they implemented it, there was no RFC.
175  * The name for IPv6 is the same as on Linux.
176  */
177 #ifndef IP_PKTINFO
178 # ifdef IP_RECVDSTADDR
179 #  define IP_PKTINFO IP_RECVDSTADDR
180 # endif
181 #endif
182
183 #define socket_wrapper_init_mutex(m) \
184         _socket_wrapper_init_mutex(m, #m)
185
186 /* Add new global locks here please */
187 # define SWRAP_REINIT_ALL do { \
188         int ret; \
189         ret = socket_wrapper_init_mutex(&sockets_mutex); \
190         if (ret != 0) exit(-1); \
191         ret = socket_wrapper_init_mutex(&socket_reset_mutex); \
192         if (ret != 0) exit(-1); \
193         ret = socket_wrapper_init_mutex(&first_free_mutex); \
194         if (ret != 0) exit(-1); \
195         ret = socket_wrapper_init_mutex(&sockets_si_global); \
196         if (ret != 0) exit(-1); \
197         ret = socket_wrapper_init_mutex(&autobind_start_mutex); \
198         if (ret != 0) exit(-1); \
199         ret = socket_wrapper_init_mutex(&pcap_dump_mutex); \
200         if (ret != 0) exit(-1); \
201         ret = socket_wrapper_init_mutex(&mtu_update_mutex); \
202         if (ret != 0) exit(-1); \
203 } while(0)
204
205 # define SWRAP_LOCK_ALL do { \
206         swrap_mutex_lock(&sockets_mutex); \
207         swrap_mutex_lock(&socket_reset_mutex); \
208         swrap_mutex_lock(&first_free_mutex); \
209         swrap_mutex_lock(&sockets_si_global); \
210         swrap_mutex_lock(&autobind_start_mutex); \
211         swrap_mutex_lock(&pcap_dump_mutex); \
212         swrap_mutex_lock(&mtu_update_mutex); \
213 } while(0)
214
215 # define SWRAP_UNLOCK_ALL do { \
216         swrap_mutex_unlock(&mtu_update_mutex); \
217         swrap_mutex_unlock(&pcap_dump_mutex); \
218         swrap_mutex_unlock(&autobind_start_mutex); \
219         swrap_mutex_unlock(&sockets_si_global); \
220         swrap_mutex_unlock(&first_free_mutex); \
221         swrap_mutex_unlock(&socket_reset_mutex); \
222         swrap_mutex_unlock(&sockets_mutex); \
223 } while(0)
224
225 #define SOCKET_INFO_CONTAINER(si) \
226         (struct socket_info_container *)(si)
227
228 #define SWRAP_LOCK_SI(si) do { \
229         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
230         if (sic != NULL) { \
231                 swrap_mutex_lock(&sockets_si_global); \
232         } else { \
233                 abort(); \
234         } \
235 } while(0)
236
237 #define SWRAP_UNLOCK_SI(si) do { \
238         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \
239         if (sic != NULL) { \
240                 swrap_mutex_unlock(&sockets_si_global); \
241         } else { \
242                 abort(); \
243         } \
244 } while(0)
245
246 #if defined(HAVE_GETTIMEOFDAY_TZ) || defined(HAVE_GETTIMEOFDAY_TZ_VOID)
247 #define swrapGetTimeOfDay(tval) gettimeofday(tval,NULL)
248 #else
249 #define swrapGetTimeOfDay(tval) gettimeofday(tval)
250 #endif
251
252 /* we need to use a very terse format here as IRIX 6.4 silently
253    truncates names to 16 chars, so if we use a longer name then we
254    can't tell which port a packet came from with recvfrom()
255
256    with this format we have 8 chars left for the directory name
257 */
258 #define SOCKET_FORMAT "%c%02X%04X"
259 #define SOCKET_TYPE_CHAR_TCP            'T'
260 #define SOCKET_TYPE_CHAR_UDP            'U'
261 #define SOCKET_TYPE_CHAR_TCP_V6         'X'
262 #define SOCKET_TYPE_CHAR_UDP_V6         'Y'
263
264 /*
265  * Set the packet MTU to 1500 bytes for stream sockets to make it it easier to
266  * format PCAP capture files (as the caller will simply continue from here).
267  */
268 #define SOCKET_WRAPPER_MTU_DEFAULT 1500
269 #define SOCKET_WRAPPER_MTU_MIN     512
270 #define SOCKET_WRAPPER_MTU_MAX     32768
271
272 #define SOCKET_MAX_SOCKETS 1024
273
274 /*
275  * Maximum number of socket_info structures that can
276  * be used. Can be overriden by the environment variable
277  * SOCKET_WRAPPER_MAX_SOCKETS.
278  */
279 #define SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT 65535
280
281 #define SOCKET_WRAPPER_MAX_SOCKETS_LIMIT 262140
282
283 /* This limit is to avoid broadcast sendto() needing to stat too many
284  * files.  It may be raised (with a performance cost) to up to 254
285  * without changing the format above */
286 #define MAX_WRAPPED_INTERFACES 64
287
288 struct swrap_address {
289         socklen_t sa_socklen;
290         union {
291                 struct sockaddr s;
292                 struct sockaddr_in in;
293 #ifdef HAVE_IPV6
294                 struct sockaddr_in6 in6;
295 #endif
296                 struct sockaddr_un un;
297                 struct sockaddr_storage ss;
298         } sa;
299 };
300
301 static int first_free;
302
303 struct socket_info
304 {
305         /*
306          * Remember to update swrap_unix_scm_right_magic
307          * on any change.
308          */
309
310         int family;
311         int type;
312         int protocol;
313         int bound;
314         int bcast;
315         int is_server;
316         int connected;
317         int defer_connect;
318         int pktinfo;
319         int tcp_nodelay;
320         int listening;
321         int fd_passed;
322
323         /* The unix path so we can unlink it on close() */
324         struct sockaddr_un un_addr;
325
326         struct swrap_address bindname;
327         struct swrap_address myname;
328         struct swrap_address peername;
329
330         struct {
331                 unsigned long pck_snd;
332                 unsigned long pck_rcv;
333         } io;
334 };
335
336 struct socket_info_meta
337 {
338         unsigned int refcount;
339         int next_free;
340         /*
341          * As long as we don't use shared memory
342          * for the sockets array, we use
343          * sockets_si_global as a single mutex.
344          *
345          * pthread_mutex_t mutex;
346          */
347 };
348
349 struct socket_info_container
350 {
351         struct socket_info info;
352         struct socket_info_meta meta;
353 };
354
355 static struct socket_info_container *sockets;
356
357 static size_t socket_info_max = 0;
358
359 /*
360  * Allocate the socket array always on the limit value. We want it to be
361  * at least bigger than the default so if we reach the limit we can
362  * still deal with duplicate fds pointing to the same socket_info.
363  */
364 static size_t socket_fds_max = SOCKET_WRAPPER_MAX_SOCKETS_LIMIT;
365
366 /* Hash table to map fds to corresponding socket_info index */
367 static int *socket_fds_idx;
368
369 /* Mutex for syncronizing port selection during swrap_auto_bind() */
370 static pthread_mutex_t autobind_start_mutex = PTHREAD_MUTEX_INITIALIZER;
371
372 /* Mutex to guard the initialization of array of socket_info structures */
373 static pthread_mutex_t sockets_mutex = PTHREAD_MUTEX_INITIALIZER;
374
375 /* Mutex to guard the socket reset in swrap_remove_wrapper() */
376 static pthread_mutex_t socket_reset_mutex = PTHREAD_MUTEX_INITIALIZER;
377
378 /* Mutex to synchronize access to first free index in socket_info array */
379 static pthread_mutex_t first_free_mutex = PTHREAD_MUTEX_INITIALIZER;
380
381 /*
382  * Mutex to synchronize access to to socket_info structures
383  * We use a single global mutex in order to avoid leaking
384  * ~ 38M copy on write memory per fork.
385  * max_sockets=65535 * sizeof(struct socket_info_container)=592 = 38796720
386  */
387 static pthread_mutex_t sockets_si_global = PTHREAD_MUTEX_INITIALIZER;
388
389 /* Mutex to synchronize access to packet capture dump file */
390 static pthread_mutex_t pcap_dump_mutex = PTHREAD_MUTEX_INITIALIZER;
391
392 /* Mutex for synchronizing mtu value fetch*/
393 static pthread_mutex_t mtu_update_mutex = PTHREAD_MUTEX_INITIALIZER;
394
395 /* Function prototypes */
396
397 #if ! defined(HAVE_CONSTRUCTOR_ATTRIBUTE) && defined(HAVE_PRAGMA_INIT)
398 /* xlC and other oldschool compilers support (only) this */
399 #pragma init (swrap_constructor)
400 #endif
401 void swrap_constructor(void) CONSTRUCTOR_ATTRIBUTE;
402 #if ! defined(HAVE_DESTRUCTOR_ATTRIBUTE) && defined(HAVE_PRAGMA_FINI)
403 #pragma fini (swrap_destructor)
404 #endif
405 void swrap_destructor(void) DESTRUCTOR_ATTRIBUTE;
406
407 #ifndef HAVE_GETPROGNAME
408 static const char *getprogname(void)
409 {
410 #if defined(HAVE_PROGRAM_INVOCATION_SHORT_NAME)
411         return program_invocation_short_name;
412 #elif defined(HAVE_GETEXECNAME)
413         return getexecname();
414 #else
415         return NULL;
416 #endif /* HAVE_PROGRAM_INVOCATION_SHORT_NAME */
417 }
418 #endif /* HAVE_GETPROGNAME */
419
420 static void swrap_log(enum swrap_dbglvl_e dbglvl, const char *func, const char *format, ...) PRINTF_ATTRIBUTE(3, 4);
421 # define SWRAP_LOG(dbglvl, ...) swrap_log((dbglvl), __func__, __VA_ARGS__)
422
423 static void swrap_log(enum swrap_dbglvl_e dbglvl,
424                       const char *func,
425                       const char *format, ...)
426 {
427         char buffer[1024];
428         va_list va;
429         const char *d;
430         unsigned int lvl = 0;
431         const char *prefix = "SWRAP";
432         const char *progname = getprogname();
433
434         d = getenv("SOCKET_WRAPPER_DEBUGLEVEL");
435         if (d != NULL) {
436                 lvl = atoi(d);
437         }
438
439         if (lvl < dbglvl) {
440                 return;
441         }
442
443         va_start(va, format);
444         vsnprintf(buffer, sizeof(buffer), format, va);
445         va_end(va);
446
447         switch (dbglvl) {
448                 case SWRAP_LOG_ERROR:
449                         prefix = "SWRAP_ERROR";
450                         break;
451                 case SWRAP_LOG_WARN:
452                         prefix = "SWRAP_WARN";
453                         break;
454                 case SWRAP_LOG_DEBUG:
455                         prefix = "SWRAP_DEBUG";
456                         break;
457                 case SWRAP_LOG_TRACE:
458                         prefix = "SWRAP_TRACE";
459                         break;
460         }
461
462         if (progname == NULL) {
463                 progname = "<unknown>";
464         }
465
466         fprintf(stderr,
467                 "%s[%s (%u)] - %s: %s\n",
468                 prefix,
469                 progname,
470                 (unsigned int)getpid(),
471                 func,
472                 buffer);
473 }
474
475 /*********************************************************
476  * SWRAP LOADING LIBC FUNCTIONS
477  *********************************************************/
478
479 #include <dlfcn.h>
480
481 #ifdef HAVE_ACCEPT4
482 typedef int (*__libc_accept4)(int sockfd,
483                               struct sockaddr *addr,
484                               socklen_t *addrlen,
485                               int flags);
486 #else
487 typedef int (*__libc_accept)(int sockfd,
488                              struct sockaddr *addr,
489                              socklen_t *addrlen);
490 #endif
491 typedef int (*__libc_bind)(int sockfd,
492                            const struct sockaddr *addr,
493                            socklen_t addrlen);
494 typedef int (*__libc_close)(int fd);
495 #ifdef HAVE___CLOSE_NOCANCEL
496 typedef int (*__libc___close_nocancel)(int fd);
497 #endif
498 typedef int (*__libc_connect)(int sockfd,
499                               const struct sockaddr *addr,
500                               socklen_t addrlen);
501 typedef int (*__libc_dup)(int fd);
502 typedef int (*__libc_dup2)(int oldfd, int newfd);
503 typedef int (*__libc_fcntl)(int fd, int cmd, ...);
504 typedef FILE *(*__libc_fopen)(const char *name, const char *mode);
505 #ifdef HAVE_FOPEN64
506 typedef FILE *(*__libc_fopen64)(const char *name, const char *mode);
507 #endif
508 #ifdef HAVE_EVENTFD
509 typedef int (*__libc_eventfd)(int count, int flags);
510 #endif
511 typedef int (*__libc_getpeername)(int sockfd,
512                                   struct sockaddr *addr,
513                                   socklen_t *addrlen);
514 typedef int (*__libc_getsockname)(int sockfd,
515                                   struct sockaddr *addr,
516                                   socklen_t *addrlen);
517 typedef int (*__libc_getsockopt)(int sockfd,
518                                int level,
519                                int optname,
520                                void *optval,
521                                socklen_t *optlen);
522 typedef int (*__libc_ioctl)(int d, unsigned long int request, ...);
523 typedef int (*__libc_listen)(int sockfd, int backlog);
524 typedef int (*__libc_open)(const char *pathname, int flags, ...);
525 #ifdef HAVE_OPEN64
526 typedef int (*__libc_open64)(const char *pathname, int flags, ...);
527 #endif /* HAVE_OPEN64 */
528 typedef int (*__libc_openat)(int dirfd, const char *path, int flags, ...);
529 typedef int (*__libc_pipe)(int pipefd[2]);
530 typedef int (*__libc_read)(int fd, void *buf, size_t count);
531 typedef ssize_t (*__libc_readv)(int fd, const struct iovec *iov, int iovcnt);
532 typedef int (*__libc_recv)(int sockfd, void *buf, size_t len, int flags);
533 typedef int (*__libc_recvfrom)(int sockfd,
534                              void *buf,
535                              size_t len,
536                              int flags,
537                              struct sockaddr *src_addr,
538                              socklen_t *addrlen);
539 typedef int (*__libc_recvmsg)(int sockfd, const struct msghdr *msg, int flags);
540 #ifdef HAVE_RECVMMSG
541 #if defined(HAVE_RECVMMSG_SSIZE_T_CONST_TIMEOUT)
542 /* FreeBSD */
543 typedef ssize_t (*__libc_recvmmsg)(int sockfd, struct mmsghdr *msgvec, size_t vlen, int flags, const struct timespec *timeout);
544 #elif defined(HAVE_RECVMMSG_CONST_TIMEOUT)
545 /* Linux legacy glibc < 2.21 */
546 typedef int (*__libc_recvmmsg)(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags, const struct timespec *timeout);
547 #else
548 /* Linux glibc >= 2.21 */
549 typedef int (*__libc_recvmmsg)(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags, struct timespec *timeout);
550 #endif
551 #endif /* HAVE_RECVMMSG */
552 typedef int (*__libc_send)(int sockfd, const void *buf, size_t len, int flags);
553 typedef int (*__libc_sendmsg)(int sockfd, const struct msghdr *msg, int flags);
554 #ifdef HAVE_SENDMMSG
555 #if defined(HAVE_SENDMMSG_SSIZE_T)
556 /* FreeBSD */
557 typedef ssize_t (*__libc_sendmmsg)(int sockfd, struct mmsghdr *msgvec, size_t vlen, int flags);
558 #else
559 /* Linux */
560 typedef int (*__libc_sendmmsg)(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags);
561 #endif
562 #endif /* HAVE_SENDMMSG */
563 typedef int (*__libc_sendto)(int sockfd,
564                            const void *buf,
565                            size_t len,
566                            int flags,
567                            const  struct sockaddr *dst_addr,
568                            socklen_t addrlen);
569 typedef int (*__libc_setsockopt)(int sockfd,
570                                int level,
571                                int optname,
572                                const void *optval,
573                                socklen_t optlen);
574 #ifdef HAVE_SIGNALFD
575 typedef int (*__libc_signalfd)(int fd, const sigset_t *mask, int flags);
576 #endif
577 typedef int (*__libc_socket)(int domain, int type, int protocol);
578 typedef int (*__libc_socketpair)(int domain, int type, int protocol, int sv[2]);
579 #ifdef HAVE_TIMERFD_CREATE
580 typedef int (*__libc_timerfd_create)(int clockid, int flags);
581 #endif
582 typedef ssize_t (*__libc_write)(int fd, const void *buf, size_t count);
583 typedef ssize_t (*__libc_writev)(int fd, const struct iovec *iov, int iovcnt);
584
585 #define SWRAP_SYMBOL_ENTRY(i) \
586         union { \
587                 __libc_##i f; \
588                 void *obj; \
589         } _libc_##i
590
591 struct swrap_libc_symbols {
592 #ifdef HAVE_ACCEPT4
593         SWRAP_SYMBOL_ENTRY(accept4);
594 #else
595         SWRAP_SYMBOL_ENTRY(accept);
596 #endif
597         SWRAP_SYMBOL_ENTRY(bind);
598         SWRAP_SYMBOL_ENTRY(close);
599 #ifdef HAVE___CLOSE_NOCANCEL
600         SWRAP_SYMBOL_ENTRY(__close_nocancel);
601 #endif
602         SWRAP_SYMBOL_ENTRY(connect);
603         SWRAP_SYMBOL_ENTRY(dup);
604         SWRAP_SYMBOL_ENTRY(dup2);
605         SWRAP_SYMBOL_ENTRY(fcntl);
606         SWRAP_SYMBOL_ENTRY(fopen);
607 #ifdef HAVE_FOPEN64
608         SWRAP_SYMBOL_ENTRY(fopen64);
609 #endif
610 #ifdef HAVE_EVENTFD
611         SWRAP_SYMBOL_ENTRY(eventfd);
612 #endif
613         SWRAP_SYMBOL_ENTRY(getpeername);
614         SWRAP_SYMBOL_ENTRY(getsockname);
615         SWRAP_SYMBOL_ENTRY(getsockopt);
616         SWRAP_SYMBOL_ENTRY(ioctl);
617         SWRAP_SYMBOL_ENTRY(listen);
618         SWRAP_SYMBOL_ENTRY(open);
619 #ifdef HAVE_OPEN64
620         SWRAP_SYMBOL_ENTRY(open64);
621 #endif
622         SWRAP_SYMBOL_ENTRY(openat);
623         SWRAP_SYMBOL_ENTRY(pipe);
624         SWRAP_SYMBOL_ENTRY(read);
625         SWRAP_SYMBOL_ENTRY(readv);
626         SWRAP_SYMBOL_ENTRY(recv);
627         SWRAP_SYMBOL_ENTRY(recvfrom);
628         SWRAP_SYMBOL_ENTRY(recvmsg);
629 #ifdef HAVE_RECVMMSG
630         SWRAP_SYMBOL_ENTRY(recvmmsg);
631 #endif
632         SWRAP_SYMBOL_ENTRY(send);
633         SWRAP_SYMBOL_ENTRY(sendmsg);
634 #ifdef HAVE_SENDMMSG
635         SWRAP_SYMBOL_ENTRY(sendmmsg);
636 #endif
637         SWRAP_SYMBOL_ENTRY(sendto);
638         SWRAP_SYMBOL_ENTRY(setsockopt);
639 #ifdef HAVE_SIGNALFD
640         SWRAP_SYMBOL_ENTRY(signalfd);
641 #endif
642         SWRAP_SYMBOL_ENTRY(socket);
643         SWRAP_SYMBOL_ENTRY(socketpair);
644 #ifdef HAVE_TIMERFD_CREATE
645         SWRAP_SYMBOL_ENTRY(timerfd_create);
646 #endif
647         SWRAP_SYMBOL_ENTRY(write);
648         SWRAP_SYMBOL_ENTRY(writev);
649 };
650
651 struct swrap {
652         struct {
653                 void *handle;
654                 void *socket_handle;
655                 struct swrap_libc_symbols symbols;
656         } libc;
657 };
658
659 static struct swrap swrap;
660
661 /* prototypes */
662 static char *socket_wrapper_dir(void);
663
664 #define LIBC_NAME "libc.so"
665
666 enum swrap_lib {
667     SWRAP_LIBC,
668     SWRAP_LIBSOCKET,
669 };
670
671 static const char *swrap_str_lib(enum swrap_lib lib)
672 {
673         switch (lib) {
674         case SWRAP_LIBC:
675                 return "libc";
676         case SWRAP_LIBSOCKET:
677                 return "libsocket";
678         }
679
680         /* Compiler would warn us about unhandled enum value if we get here */
681         return "unknown";
682 }
683
684 static void *swrap_load_lib_handle(enum swrap_lib lib)
685 {
686         int flags = RTLD_LAZY;
687         void *handle = NULL;
688         int i;
689
690 #ifdef RTLD_DEEPBIND
691         const char *env_preload = getenv("LD_PRELOAD");
692         const char *env_deepbind = getenv("SOCKET_WRAPPER_DISABLE_DEEPBIND");
693         bool enable_deepbind = true;
694
695         /* Don't do a deepbind if we run with libasan */
696         if (env_preload != NULL && strlen(env_preload) < 1024) {
697                 const char *p = strstr(env_preload, "libasan.so");
698                 if (p != NULL) {
699                         enable_deepbind = false;
700                 }
701         }
702
703         if (env_deepbind != NULL && strlen(env_deepbind) >= 1) {
704                 enable_deepbind = false;
705         }
706
707         if (enable_deepbind) {
708                 flags |= RTLD_DEEPBIND;
709         }
710 #endif
711
712         switch (lib) {
713         case SWRAP_LIBSOCKET:
714 #ifdef HAVE_LIBSOCKET
715                 handle = swrap.libc.socket_handle;
716                 if (handle == NULL) {
717                         for (i = 10; i >= 0; i--) {
718                                 char soname[256] = {0};
719
720                                 snprintf(soname, sizeof(soname), "libsocket.so.%d", i);
721                                 handle = dlopen(soname, flags);
722                                 if (handle != NULL) {
723                                         break;
724                                 }
725                         }
726
727                         swrap.libc.socket_handle = handle;
728                 }
729                 break;
730 #endif
731         case SWRAP_LIBC:
732                 handle = swrap.libc.handle;
733 #ifdef LIBC_SO
734                 if (handle == NULL) {
735                         handle = dlopen(LIBC_SO, flags);
736
737                         swrap.libc.handle = handle;
738                 }
739 #endif
740                 if (handle == NULL) {
741                         for (i = 10; i >= 0; i--) {
742                                 char soname[256] = {0};
743
744                                 snprintf(soname, sizeof(soname), "libc.so.%d", i);
745                                 handle = dlopen(soname, flags);
746                                 if (handle != NULL) {
747                                         break;
748                                 }
749                         }
750
751                         swrap.libc.handle = handle;
752                 }
753                 break;
754         }
755
756         if (handle == NULL) {
757 #ifdef RTLD_NEXT
758                 handle = swrap.libc.handle = swrap.libc.socket_handle = RTLD_NEXT;
759 #else
760                 SWRAP_LOG(SWRAP_LOG_ERROR,
761                           "Failed to dlopen library: %s",
762                           dlerror());
763                 exit(-1);
764 #endif
765         }
766
767         return handle;
768 }
769
770 static void *_swrap_bind_symbol(enum swrap_lib lib, const char *fn_name)
771 {
772         void *handle;
773         void *func;
774
775         handle = swrap_load_lib_handle(lib);
776
777         func = dlsym(handle, fn_name);
778         if (func == NULL) {
779                 SWRAP_LOG(SWRAP_LOG_ERROR,
780                           "Failed to find %s: %s",
781                           fn_name,
782                           dlerror());
783                 exit(-1);
784         }
785
786         SWRAP_LOG(SWRAP_LOG_TRACE,
787                   "Loaded %s from %s",
788                   fn_name,
789                   swrap_str_lib(lib));
790
791         return func;
792 }
793
794 #define swrap_mutex_lock(m) _swrap_mutex_lock(m, #m, __func__, __LINE__)
795 static void _swrap_mutex_lock(pthread_mutex_t *mutex, const char *name, const char *caller, unsigned line)
796 {
797         int ret;
798
799         ret = pthread_mutex_lock(mutex);
800         if (ret != 0) {
801                 SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't lock pthread mutex(%s) - %s",
802                           getpid(), getppid(), caller, line, name, strerror(ret));
803                 abort();
804         }
805 }
806
807 #define swrap_mutex_unlock(m) _swrap_mutex_unlock(m, #m, __func__, __LINE__)
808 static void _swrap_mutex_unlock(pthread_mutex_t *mutex, const char *name, const char *caller, unsigned line)
809 {
810         int ret;
811
812         ret = pthread_mutex_unlock(mutex);
813         if (ret != 0) {
814                 SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't unlock pthread mutex(%s) - %s",
815                           getpid(), getppid(), caller, line, name, strerror(ret));
816                 abort();
817         }
818 }
819
820 /*
821  * These macros have a thread race condition on purpose!
822  *
823  * This is an optimization to avoid locking each time we check if the symbol is
824  * bound.
825  */
826 #define _swrap_bind_symbol_generic(lib, sym_name) do { \
827         swrap.libc.symbols._libc_##sym_name.obj = \
828                 _swrap_bind_symbol(lib, #sym_name); \
829 } while(0);
830
831 #define swrap_bind_symbol_libc(sym_name) \
832         _swrap_bind_symbol_generic(SWRAP_LIBC, sym_name)
833
834 #define swrap_bind_symbol_libsocket(sym_name) \
835         _swrap_bind_symbol_generic(SWRAP_LIBSOCKET, sym_name)
836
837 static void swrap_bind_symbol_all(void);
838
839 /****************************************************************************
840  *                               IMPORTANT
841  ****************************************************************************
842  *
843  * Functions especially from libc need to be loaded individually, you can't
844  * load all at once or gdb will segfault at startup. The same applies to
845  * valgrind and has probably something todo with with the linker.  So we need
846  * load each function at the point it is called the first time.
847  *
848  ****************************************************************************/
849
850 #ifdef HAVE_ACCEPT4
851 static int libc_accept4(int sockfd,
852                         struct sockaddr *addr,
853                         socklen_t *addrlen,
854                         int flags)
855 {
856         swrap_bind_symbol_all();
857
858         return swrap.libc.symbols._libc_accept4.f(sockfd, addr, addrlen, flags);
859 }
860
861 #else /* HAVE_ACCEPT4 */
862
863 static int libc_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
864 {
865         swrap_bind_symbol_all();
866
867         return swrap.libc.symbols._libc_accept.f(sockfd, addr, addrlen);
868 }
869 #endif /* HAVE_ACCEPT4 */
870
871 static int libc_bind(int sockfd,
872                      const struct sockaddr *addr,
873                      socklen_t addrlen)
874 {
875         swrap_bind_symbol_all();
876
877         return swrap.libc.symbols._libc_bind.f(sockfd, addr, addrlen);
878 }
879
880 static int libc_close(int fd)
881 {
882         swrap_bind_symbol_all();
883
884         return swrap.libc.symbols._libc_close.f(fd);
885 }
886
887 #ifdef HAVE___CLOSE_NOCANCEL
888 static int libc___close_nocancel(int fd)
889 {
890         swrap_bind_symbol_all();
891
892         return swrap.libc.symbols._libc___close_nocancel.f(fd);
893 }
894 #endif /* HAVE___CLOSE_NOCANCEL */
895
896 static int libc_connect(int sockfd,
897                         const struct sockaddr *addr,
898                         socklen_t addrlen)
899 {
900         swrap_bind_symbol_all();
901
902         return swrap.libc.symbols._libc_connect.f(sockfd, addr, addrlen);
903 }
904
905 static int libc_dup(int fd)
906 {
907         swrap_bind_symbol_all();
908
909         return swrap.libc.symbols._libc_dup.f(fd);
910 }
911
912 static int libc_dup2(int oldfd, int newfd)
913 {
914         swrap_bind_symbol_all();
915
916         return swrap.libc.symbols._libc_dup2.f(oldfd, newfd);
917 }
918
919 #ifdef HAVE_EVENTFD
920 static int libc_eventfd(int count, int flags)
921 {
922         swrap_bind_symbol_all();
923
924         return swrap.libc.symbols._libc_eventfd.f(count, flags);
925 }
926 #endif
927
928 DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
929 static int libc_vfcntl(int fd, int cmd, va_list ap)
930 {
931         void *arg;
932         int rc;
933
934         swrap_bind_symbol_all();
935
936         arg = va_arg(ap, void *);
937
938         rc = swrap.libc.symbols._libc_fcntl.f(fd, cmd, arg);
939
940         return rc;
941 }
942
943 static int libc_getpeername(int sockfd,
944                             struct sockaddr *addr,
945                             socklen_t *addrlen)
946 {
947         swrap_bind_symbol_all();
948
949         return swrap.libc.symbols._libc_getpeername.f(sockfd, addr, addrlen);
950 }
951
952 static int libc_getsockname(int sockfd,
953                             struct sockaddr *addr,
954                             socklen_t *addrlen)
955 {
956         swrap_bind_symbol_all();
957
958         return swrap.libc.symbols._libc_getsockname.f(sockfd, addr, addrlen);
959 }
960
961 static int libc_getsockopt(int sockfd,
962                            int level,
963                            int optname,
964                            void *optval,
965                            socklen_t *optlen)
966 {
967         swrap_bind_symbol_all();
968
969         return swrap.libc.symbols._libc_getsockopt.f(sockfd,
970                                                      level,
971                                                      optname,
972                                                      optval,
973                                                      optlen);
974 }
975
976 DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
977 static int libc_vioctl(int d, unsigned long int request, va_list ap)
978 {
979         void *arg;
980         int rc;
981
982         swrap_bind_symbol_all();
983
984         arg = va_arg(ap, void *);
985
986         rc = swrap.libc.symbols._libc_ioctl.f(d, request, arg);
987
988         return rc;
989 }
990
991 static int libc_listen(int sockfd, int backlog)
992 {
993         swrap_bind_symbol_all();
994
995         return swrap.libc.symbols._libc_listen.f(sockfd, backlog);
996 }
997
998 static FILE *libc_fopen(const char *name, const char *mode)
999 {
1000         swrap_bind_symbol_all();
1001
1002         return swrap.libc.symbols._libc_fopen.f(name, mode);
1003 }
1004
1005 #ifdef HAVE_FOPEN64
1006 static FILE *libc_fopen64(const char *name, const char *mode)
1007 {
1008         swrap_bind_symbol_all();
1009
1010         return swrap.libc.symbols._libc_fopen64.f(name, mode);
1011 }
1012 #endif /* HAVE_FOPEN64 */
1013
1014 static void swrap_inject_o_largefile(int *flags)
1015 {
1016         (void)*flags; /* maybe unused */
1017 #if SIZE_MAX == 0xffffffffUL && defined(O_LARGEFILE)
1018 #ifdef O_PATH
1019         if (((*flags) & O_PATH) == 0)
1020 #endif
1021         {
1022                 *flags |= O_LARGEFILE;
1023         }
1024 #endif
1025 }
1026
1027 static int libc_vopen(const char *pathname, int flags, va_list ap)
1028 {
1029         int mode = 0;
1030         int fd;
1031
1032         swrap_bind_symbol_all();
1033
1034         swrap_inject_o_largefile(&flags);
1035
1036         if (flags & O_CREAT) {
1037                 mode = va_arg(ap, int);
1038         }
1039         fd = swrap.libc.symbols._libc_open.f(pathname, flags, (mode_t)mode);
1040
1041         return fd;
1042 }
1043
1044 static int libc_open(const char *pathname, int flags, ...)
1045 {
1046         va_list ap;
1047         int fd;
1048
1049         va_start(ap, flags);
1050         fd = libc_vopen(pathname, flags, ap);
1051         va_end(ap);
1052
1053         return fd;
1054 }
1055
1056 #ifdef HAVE_OPEN64
1057 static int libc_vopen64(const char *pathname, int flags, va_list ap)
1058 {
1059         int mode = 0;
1060         int fd;
1061
1062         swrap_bind_symbol_all();
1063
1064         swrap_inject_o_largefile(&flags);
1065
1066         if (flags & O_CREAT) {
1067                 mode = va_arg(ap, int);
1068         }
1069         fd = swrap.libc.symbols._libc_open64.f(pathname, flags, (mode_t)mode);
1070
1071         return fd;
1072 }
1073 #endif /* HAVE_OPEN64 */
1074
1075 static int libc_vopenat(int dirfd, const char *path, int flags, va_list ap)
1076 {
1077         int mode = 0;
1078         int fd;
1079
1080         swrap_bind_symbol_all();
1081
1082         swrap_inject_o_largefile(&flags);
1083
1084         if (flags & O_CREAT) {
1085                 mode = va_arg(ap, int);
1086         }
1087         fd = swrap.libc.symbols._libc_openat.f(dirfd,
1088                                                path,
1089                                                flags,
1090                                                (mode_t)mode);
1091
1092         return fd;
1093 }
1094
1095 #if 0
1096 static int libc_openat(int dirfd, const char *path, int flags, ...)
1097 {
1098         va_list ap;
1099         int fd;
1100
1101         va_start(ap, flags);
1102         fd = libc_vopenat(dirfd, path, flags, ap);
1103         va_end(ap);
1104
1105         return fd;
1106 }
1107 #endif
1108
1109 static int libc_pipe(int pipefd[2])
1110 {
1111         swrap_bind_symbol_all();
1112
1113         return swrap.libc.symbols._libc_pipe.f(pipefd);
1114 }
1115
1116 static int libc_read(int fd, void *buf, size_t count)
1117 {
1118         swrap_bind_symbol_all();
1119
1120         return swrap.libc.symbols._libc_read.f(fd, buf, count);
1121 }
1122
1123 static ssize_t libc_readv(int fd, const struct iovec *iov, int iovcnt)
1124 {
1125         swrap_bind_symbol_all();
1126
1127         return swrap.libc.symbols._libc_readv.f(fd, iov, iovcnt);
1128 }
1129
1130 static int libc_recv(int sockfd, void *buf, size_t len, int flags)
1131 {
1132         swrap_bind_symbol_all();
1133
1134         return swrap.libc.symbols._libc_recv.f(sockfd, buf, len, flags);
1135 }
1136
1137 static int libc_recvfrom(int sockfd,
1138                          void *buf,
1139                          size_t len,
1140                          int flags,
1141                          struct sockaddr *src_addr,
1142                          socklen_t *addrlen)
1143 {
1144         swrap_bind_symbol_all();
1145
1146         return swrap.libc.symbols._libc_recvfrom.f(sockfd,
1147                                                    buf,
1148                                                    len,
1149                                                    flags,
1150                                                    src_addr,
1151                                                    addrlen);
1152 }
1153
1154 static int libc_recvmsg(int sockfd, struct msghdr *msg, int flags)
1155 {
1156         swrap_bind_symbol_all();
1157
1158         return swrap.libc.symbols._libc_recvmsg.f(sockfd, msg, flags);
1159 }
1160
1161 #ifdef HAVE_RECVMMSG
1162 #if defined(HAVE_RECVMMSG_SSIZE_T_CONST_TIMEOUT)
1163 /* FreeBSD */
1164 static ssize_t libc_recvmmsg(int sockfd, struct mmsghdr *msgvec, size_t vlen, int flags, const struct timespec *timeout)
1165 #elif defined(HAVE_RECVMMSG_CONST_TIMEOUT)
1166 /* Linux legacy glibc < 2.21 */
1167 static int libc_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags, const struct timespec *timeout)
1168 #else
1169 /* Linux glibc >= 2.21 */
1170 static int libc_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags, struct timespec *timeout)
1171 #endif
1172 {
1173         swrap_bind_symbol_all();
1174
1175         return swrap.libc.symbols._libc_recvmmsg.f(sockfd, msgvec, vlen, flags, timeout);
1176 }
1177 #endif
1178
1179 static int libc_send(int sockfd, const void *buf, size_t len, int flags)
1180 {
1181         swrap_bind_symbol_all();
1182
1183         return swrap.libc.symbols._libc_send.f(sockfd, buf, len, flags);
1184 }
1185
1186 static int libc_sendmsg(int sockfd, const struct msghdr *msg, int flags)
1187 {
1188         swrap_bind_symbol_all();
1189
1190         return swrap.libc.symbols._libc_sendmsg.f(sockfd, msg, flags);
1191 }
1192
1193 #ifdef HAVE_SENDMMSG
1194 #if defined(HAVE_SENDMMSG_SSIZE_T)
1195 /* FreeBSD */
1196 static ssize_t libc_sendmmsg(int sockfd, struct mmsghdr *msgvec, size_t vlen, int flags)
1197 #else
1198 /* Linux */
1199 static int libc_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags)
1200 #endif
1201 {
1202         swrap_bind_symbol_all();
1203
1204         return swrap.libc.symbols._libc_sendmmsg.f(sockfd, msgvec, vlen, flags);
1205 }
1206 #endif
1207
1208 static int libc_sendto(int sockfd,
1209                        const void *buf,
1210                        size_t len,
1211                        int flags,
1212                        const  struct sockaddr *dst_addr,
1213                        socklen_t addrlen)
1214 {
1215         swrap_bind_symbol_all();
1216
1217         return swrap.libc.symbols._libc_sendto.f(sockfd,
1218                                                  buf,
1219                                                  len,
1220                                                  flags,
1221                                                  dst_addr,
1222                                                  addrlen);
1223 }
1224
1225 static int libc_setsockopt(int sockfd,
1226                            int level,
1227                            int optname,
1228                            const void *optval,
1229                            socklen_t optlen)
1230 {
1231         swrap_bind_symbol_all();
1232
1233         return swrap.libc.symbols._libc_setsockopt.f(sockfd,
1234                                                      level,
1235                                                      optname,
1236                                                      optval,
1237                                                      optlen);
1238 }
1239
1240 #ifdef HAVE_SIGNALFD
1241 static int libc_signalfd(int fd, const sigset_t *mask, int flags)
1242 {
1243         swrap_bind_symbol_all();
1244
1245         return swrap.libc.symbols._libc_signalfd.f(fd, mask, flags);
1246 }
1247 #endif
1248
1249 static int libc_socket(int domain, int type, int protocol)
1250 {
1251         swrap_bind_symbol_all();
1252
1253         return swrap.libc.symbols._libc_socket.f(domain, type, protocol);
1254 }
1255
1256 static int libc_socketpair(int domain, int type, int protocol, int sv[2])
1257 {
1258         swrap_bind_symbol_all();
1259
1260         return swrap.libc.symbols._libc_socketpair.f(domain, type, protocol, sv);
1261 }
1262
1263 #ifdef HAVE_TIMERFD_CREATE
1264 static int libc_timerfd_create(int clockid, int flags)
1265 {
1266         swrap_bind_symbol_all();
1267
1268         return swrap.libc.symbols._libc_timerfd_create.f(clockid, flags);
1269 }
1270 #endif
1271
1272 static ssize_t libc_write(int fd, const void *buf, size_t count)
1273 {
1274         swrap_bind_symbol_all();
1275
1276         return swrap.libc.symbols._libc_write.f(fd, buf, count);
1277 }
1278
1279 static ssize_t libc_writev(int fd, const struct iovec *iov, int iovcnt)
1280 {
1281         swrap_bind_symbol_all();
1282
1283         return swrap.libc.symbols._libc_writev.f(fd, iov, iovcnt);
1284 }
1285
1286 /* DO NOT call this function during library initialization! */
1287 static void __swrap_bind_symbol_all_once(void)
1288 {
1289 #ifdef HAVE_ACCEPT4
1290         swrap_bind_symbol_libsocket(accept4);
1291 #else
1292         swrap_bind_symbol_libsocket(accept);
1293 #endif
1294         swrap_bind_symbol_libsocket(bind);
1295         swrap_bind_symbol_libc(close);
1296 #ifdef HAVE___CLOSE_NOCANCEL
1297         swrap_bind_symbol_libc(__close_nocancel);
1298 #endif
1299         swrap_bind_symbol_libsocket(connect);
1300         swrap_bind_symbol_libc(dup);
1301         swrap_bind_symbol_libc(dup2);
1302         swrap_bind_symbol_libc(fcntl);
1303         swrap_bind_symbol_libc(fopen);
1304 #ifdef HAVE_FOPEN64
1305         swrap_bind_symbol_libc(fopen64);
1306 #endif
1307 #ifdef HAVE_EVENTFD
1308         swrap_bind_symbol_libc(eventfd);
1309 #endif
1310         swrap_bind_symbol_libsocket(getpeername);
1311         swrap_bind_symbol_libsocket(getsockname);
1312         swrap_bind_symbol_libsocket(getsockopt);
1313         swrap_bind_symbol_libc(ioctl);
1314         swrap_bind_symbol_libsocket(listen);
1315         swrap_bind_symbol_libc(open);
1316 #ifdef HAVE_OPEN64
1317         swrap_bind_symbol_libc(open64);
1318 #endif
1319         swrap_bind_symbol_libc(openat);
1320         swrap_bind_symbol_libsocket(pipe);
1321         swrap_bind_symbol_libc(read);
1322         swrap_bind_symbol_libsocket(readv);
1323         swrap_bind_symbol_libsocket(recv);
1324         swrap_bind_symbol_libsocket(recvfrom);
1325         swrap_bind_symbol_libsocket(recvmsg);
1326 #ifdef HAVE_RECVMMSG
1327         swrap_bind_symbol_libsocket(recvmmsg);
1328 #endif
1329         swrap_bind_symbol_libsocket(send);
1330         swrap_bind_symbol_libsocket(sendmsg);
1331 #ifdef HAVE_SENDMMSG
1332         swrap_bind_symbol_libsocket(sendmmsg);
1333 #endif
1334         swrap_bind_symbol_libsocket(sendto);
1335         swrap_bind_symbol_libsocket(setsockopt);
1336 #ifdef HAVE_SIGNALFD
1337         swrap_bind_symbol_libsocket(signalfd);
1338 #endif
1339         swrap_bind_symbol_libsocket(socket);
1340         swrap_bind_symbol_libsocket(socketpair);
1341 #ifdef HAVE_TIMERFD_CREATE
1342         swrap_bind_symbol_libc(timerfd_create);
1343 #endif
1344         swrap_bind_symbol_libc(write);
1345         swrap_bind_symbol_libsocket(writev);
1346 }
1347
1348 static void swrap_bind_symbol_all(void)
1349 {
1350         static pthread_once_t all_symbol_binding_once = PTHREAD_ONCE_INIT;
1351
1352         pthread_once(&all_symbol_binding_once, __swrap_bind_symbol_all_once);
1353 }
1354
1355 /*********************************************************
1356  * SWRAP HELPER FUNCTIONS
1357  *********************************************************/
1358
1359 /*
1360  * We return 127.0.0.0 (default) or 10.53.57.0.
1361  *
1362  * This can be controlled by:
1363  * SOCKET_WRAPPER_IPV4_NETWORK=127.0.0.0 (default)
1364  * or
1365  * SOCKET_WRAPPER_IPV4_NETWORK=10.53.57.0
1366  */
1367 static in_addr_t swrap_ipv4_net(void)
1368 {
1369         static int initialized;
1370         static in_addr_t hv;
1371         const char *net_str = NULL;
1372         struct in_addr nv;
1373         int ret;
1374
1375         if (initialized) {
1376                 return hv;
1377         }
1378         initialized = 1;
1379
1380         net_str = getenv("SOCKET_WRAPPER_IPV4_NETWORK");
1381         if (net_str == NULL) {
1382                 net_str = "127.0.0.0";
1383         }
1384
1385         ret = inet_pton(AF_INET, net_str, &nv);
1386         if (ret <= 0) {
1387                 SWRAP_LOG(SWRAP_LOG_ERROR,
1388                           "INVALID IPv4 Network [%s]",
1389                           net_str);
1390                 abort();
1391         }
1392
1393         hv = ntohl(nv.s_addr);
1394
1395         switch (hv) {
1396         case 0x7f000000:
1397                 /* 127.0.0.0 */
1398                 break;
1399         case 0x0a353900:
1400                 /* 10.53.57.0 */
1401                 break;
1402         default:
1403                 SWRAP_LOG(SWRAP_LOG_ERROR,
1404                           "INVALID IPv4 Network [%s][0x%x] should be "
1405                           "127.0.0.0 or 10.53.57.0",
1406                           net_str, (unsigned)hv);
1407                 abort();
1408         }
1409
1410         return hv;
1411 }
1412
1413 /*
1414  * This returns 127.255.255.255 or 10.255.255.255
1415  */
1416 static in_addr_t swrap_ipv4_bcast(void)
1417 {
1418         in_addr_t hv;
1419
1420         hv = swrap_ipv4_net();
1421         hv |= IN_CLASSA_HOST;
1422
1423         return hv;
1424 }
1425
1426 /*
1427  * This returns 127.0.0.${iface} or 10.53.57.${iface}
1428  */
1429 static in_addr_t swrap_ipv4_iface(unsigned int iface)
1430 {
1431         in_addr_t hv;
1432
1433         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
1434                 SWRAP_LOG(SWRAP_LOG_ERROR,
1435                           "swrap_ipv4_iface(%u) invalid!",
1436                           iface);
1437                 abort();
1438                 return -1;
1439         }
1440
1441         hv = swrap_ipv4_net();
1442         hv |= iface;
1443
1444         return hv;
1445 }
1446
1447 #ifdef HAVE_IPV6
1448 /*
1449  * FD00::5357:5FXX
1450  */
1451 static const struct in6_addr *swrap_ipv6(void)
1452 {
1453         static struct in6_addr v;
1454         static int initialized;
1455         int ret;
1456
1457         if (initialized) {
1458                 return &v;
1459         }
1460         initialized = 1;
1461
1462         ret = inet_pton(AF_INET6, "FD00::5357:5F00", &v);
1463         if (ret <= 0) {
1464                 abort();
1465         }
1466
1467         return &v;
1468 }
1469 #endif
1470
1471 static void set_port(int family, int prt, struct swrap_address *addr)
1472 {
1473         switch (family) {
1474         case AF_INET:
1475                 addr->sa.in.sin_port = htons(prt);
1476                 break;
1477 #ifdef HAVE_IPV6
1478         case AF_INET6:
1479                 addr->sa.in6.sin6_port = htons(prt);
1480                 break;
1481 #endif
1482         }
1483 }
1484
1485 static size_t socket_length(int family)
1486 {
1487         switch (family) {
1488         case AF_INET:
1489                 return sizeof(struct sockaddr_in);
1490 #ifdef HAVE_IPV6
1491         case AF_INET6:
1492                 return sizeof(struct sockaddr_in6);
1493 #endif
1494         }
1495         return 0;
1496 }
1497
1498 struct swrap_sockaddr_buf {
1499         char str[128];
1500 };
1501
1502 static const char *swrap_sockaddr_string(struct swrap_sockaddr_buf *buf,
1503                                          const struct sockaddr *saddr)
1504 {
1505         unsigned int port = 0;
1506         char addr[64] = {0,};
1507
1508         switch (saddr->sa_family) {
1509         case AF_INET: {
1510                 const struct sockaddr_in *in =
1511                     (const struct sockaddr_in *)(const void *)saddr;
1512
1513                 port = ntohs(in->sin_port);
1514
1515                 inet_ntop(saddr->sa_family,
1516                           &in->sin_addr,
1517                           addr, sizeof(addr));
1518                 break;
1519         }
1520 #ifdef HAVE_IPV6
1521         case AF_INET6: {
1522                 const struct sockaddr_in6 *in6 =
1523                     (const struct sockaddr_in6 *)(const void *)saddr;
1524
1525                 port = ntohs(in6->sin6_port);
1526
1527                 inet_ntop(saddr->sa_family,
1528                           &in6->sin6_addr,
1529                           addr, sizeof(addr));
1530                 break;
1531         }
1532 #endif
1533         default:
1534                 snprintf(addr, sizeof(addr),
1535                          "<Unknown address family %u>",
1536                          saddr->sa_family);
1537                 break;
1538         }
1539
1540         snprintf(buf->str, sizeof(buf->str),
1541                  "addr[%s]/port[%u]",
1542                  addr, port);
1543
1544         return buf->str;
1545 }
1546
1547 static struct socket_info *swrap_get_socket_info(int si_index)
1548 {
1549         return (struct socket_info *)(&(sockets[si_index].info));
1550 }
1551
1552 static int swrap_get_refcount(struct socket_info *si)
1553 {
1554         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1555         return sic->meta.refcount;
1556 }
1557
1558 static void swrap_inc_refcount(struct socket_info *si)
1559 {
1560         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1561
1562         sic->meta.refcount += 1;
1563 }
1564
1565 static void swrap_dec_refcount(struct socket_info *si)
1566 {
1567         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1568
1569         sic->meta.refcount -= 1;
1570 }
1571
1572 static int swrap_get_next_free(struct socket_info *si)
1573 {
1574         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1575
1576         return sic->meta.next_free;
1577 }
1578
1579 static void swrap_set_next_free(struct socket_info *si, int next_free)
1580 {
1581         struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si);
1582
1583         sic->meta.next_free = next_free;
1584 }
1585
1586 static int swrap_un_path(struct sockaddr_un *un,
1587                          const char *swrap_dir,
1588                          char type,
1589                          unsigned int iface,
1590                          unsigned int prt)
1591 {
1592         int ret;
1593
1594         ret = snprintf(un->sun_path,
1595                        sizeof(un->sun_path),
1596                        "%s/"SOCKET_FORMAT,
1597                        swrap_dir,
1598                        type,
1599                        iface,
1600                        prt);
1601         if ((size_t)ret >= sizeof(un->sun_path)) {
1602                 return ENAMETOOLONG;
1603         }
1604
1605         return 0;
1606 }
1607
1608 static int swrap_un_path_EINVAL(struct sockaddr_un *un,
1609                                 const char *swrap_dir)
1610 {
1611         int ret;
1612
1613         ret = snprintf(un->sun_path,
1614                        sizeof(un->sun_path),
1615                        "%s/EINVAL",
1616                        swrap_dir);
1617
1618         if ((size_t)ret >= sizeof(un->sun_path)) {
1619                 return ENAMETOOLONG;
1620         }
1621
1622         return 0;
1623 }
1624
1625 static bool swrap_dir_usable(const char *swrap_dir)
1626 {
1627         struct sockaddr_un un;
1628         int ret;
1629
1630         ret = swrap_un_path(&un, swrap_dir, SOCKET_TYPE_CHAR_TCP, 0, 0);
1631         if (ret == 0) {
1632                 return true;
1633         }
1634
1635         ret = swrap_un_path_EINVAL(&un, swrap_dir);
1636         if (ret == 0) {
1637                 return true;
1638         }
1639
1640         return false;
1641 }
1642
1643 static char *socket_wrapper_dir(void)
1644 {
1645         char *swrap_dir = NULL;
1646         char *s = getenv("SOCKET_WRAPPER_DIR");
1647         char *t;
1648         bool ok;
1649
1650         if (s == NULL || s[0] == '\0') {
1651                 SWRAP_LOG(SWRAP_LOG_WARN, "SOCKET_WRAPPER_DIR not set");
1652                 return NULL;
1653         }
1654
1655         swrap_dir = realpath(s, NULL);
1656         if (swrap_dir == NULL) {
1657                 SWRAP_LOG(SWRAP_LOG_ERROR,
1658                           "Unable to resolve socket_wrapper dir path: %s - %s",
1659                           s,
1660                           strerror(errno));
1661                 abort();
1662         }
1663
1664         ok = swrap_dir_usable(swrap_dir);
1665         if (ok) {
1666                 goto done;
1667         }
1668
1669         free(swrap_dir);
1670
1671         ok = swrap_dir_usable(s);
1672         if (!ok) {
1673                 SWRAP_LOG(SWRAP_LOG_ERROR, "SOCKET_WRAPPER_DIR is too long");
1674                 abort();
1675         }
1676
1677         t = getenv("SOCKET_WRAPPER_DIR_ALLOW_ORIG");
1678         if (t == NULL) {
1679                 SWRAP_LOG(SWRAP_LOG_ERROR,
1680                           "realpath(SOCKET_WRAPPER_DIR) too long and "
1681                           "SOCKET_WRAPPER_DIR_ALLOW_ORIG not set");
1682                 abort();
1683
1684         }
1685
1686         swrap_dir = strdup(s);
1687         if (swrap_dir == NULL) {
1688                 SWRAP_LOG(SWRAP_LOG_ERROR,
1689                           "Unable to duplicate socket_wrapper dir path");
1690                 abort();
1691         }
1692
1693         SWRAP_LOG(SWRAP_LOG_WARN,
1694                   "realpath(SOCKET_WRAPPER_DIR) too long, "
1695                   "using original SOCKET_WRAPPER_DIR\n");
1696
1697 done:
1698         SWRAP_LOG(SWRAP_LOG_TRACE, "socket_wrapper_dir: %s", swrap_dir);
1699         return swrap_dir;
1700 }
1701
1702 static unsigned int socket_wrapper_mtu(void)
1703 {
1704         static unsigned int max_mtu = 0;
1705         unsigned int tmp;
1706         const char *s;
1707         char *endp;
1708
1709         swrap_mutex_lock(&mtu_update_mutex);
1710
1711         if (max_mtu != 0) {
1712                 goto done;
1713         }
1714
1715         max_mtu = SOCKET_WRAPPER_MTU_DEFAULT;
1716
1717         s = getenv("SOCKET_WRAPPER_MTU");
1718         if (s == NULL) {
1719                 goto done;
1720         }
1721
1722         tmp = strtol(s, &endp, 10);
1723         if (s == endp) {
1724                 goto done;
1725         }
1726
1727         if (tmp < SOCKET_WRAPPER_MTU_MIN || tmp > SOCKET_WRAPPER_MTU_MAX) {
1728                 goto done;
1729         }
1730         max_mtu = tmp;
1731
1732 done:
1733         swrap_mutex_unlock(&mtu_update_mutex);
1734         return max_mtu;
1735 }
1736
1737 static int _socket_wrapper_init_mutex(pthread_mutex_t *m, const char *name)
1738 {
1739         pthread_mutexattr_t ma;
1740         bool need_destroy = false;
1741         int ret = 0;
1742
1743 #define __CHECK(cmd) do { \
1744         ret = cmd; \
1745         if (ret != 0) { \
1746                 SWRAP_LOG(SWRAP_LOG_ERROR, \
1747                           "%s: %s - failed %d", \
1748                           name, #cmd, ret); \
1749                 goto done; \
1750         } \
1751 } while(0)
1752
1753         *m = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
1754         __CHECK(pthread_mutexattr_init(&ma));
1755         need_destroy = true;
1756         __CHECK(pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK));
1757         __CHECK(pthread_mutex_init(m, &ma));
1758 done:
1759         if (need_destroy) {
1760                 pthread_mutexattr_destroy(&ma);
1761         }
1762         return ret;
1763 }
1764
1765 static size_t socket_wrapper_max_sockets(void)
1766 {
1767         const char *s;
1768         size_t tmp;
1769         char *endp;
1770
1771         if (socket_info_max != 0) {
1772                 return socket_info_max;
1773         }
1774
1775         socket_info_max = SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT;
1776
1777         s = getenv("SOCKET_WRAPPER_MAX_SOCKETS");
1778         if (s == NULL || s[0] == '\0') {
1779                 goto done;
1780         }
1781
1782         tmp = strtoul(s, &endp, 10);
1783         if (s == endp) {
1784                 goto done;
1785         }
1786         if (tmp == 0) {
1787                 tmp = SOCKET_WRAPPER_MAX_SOCKETS_DEFAULT;
1788                 SWRAP_LOG(SWRAP_LOG_ERROR,
1789                           "Invalid number of sockets specified, "
1790                           "using default (%zu)",
1791                           tmp);
1792         }
1793
1794         if (tmp > SOCKET_WRAPPER_MAX_SOCKETS_LIMIT) {
1795                 tmp = SOCKET_WRAPPER_MAX_SOCKETS_LIMIT;
1796                 SWRAP_LOG(SWRAP_LOG_ERROR,
1797                           "Invalid number of sockets specified, "
1798                           "using maximum (%zu).",
1799                           tmp);
1800         }
1801
1802         socket_info_max = tmp;
1803
1804 done:
1805         return socket_info_max;
1806 }
1807
1808 static void socket_wrapper_init_fds_idx(void)
1809 {
1810         int *tmp = NULL;
1811         size_t i;
1812
1813         if (socket_fds_idx != NULL) {
1814                 return;
1815         }
1816
1817         tmp = (int *)calloc(socket_fds_max, sizeof(int));
1818         if (tmp == NULL) {
1819                 SWRAP_LOG(SWRAP_LOG_ERROR,
1820                           "Failed to allocate socket fds index array: %s",
1821                           strerror(errno));
1822                 exit(-1);
1823         }
1824
1825         for (i = 0; i < socket_fds_max; i++) {
1826                 tmp[i] = -1;
1827         }
1828
1829         socket_fds_idx = tmp;
1830 }
1831
1832 static void socket_wrapper_init_sockets(void)
1833 {
1834         size_t max_sockets;
1835         size_t i;
1836         int ret = 0;
1837
1838         swrap_bind_symbol_all();
1839
1840         swrap_mutex_lock(&sockets_mutex);
1841
1842         if (sockets != NULL) {
1843                 swrap_mutex_unlock(&sockets_mutex);
1844                 return;
1845         }
1846
1847         SWRAP_LOG(SWRAP_LOG_DEBUG,
1848                   "SOCKET_WRAPPER_PACKAGE[%s] SOCKET_WRAPPER_VERSION[%s]",
1849                   SOCKET_WRAPPER_PACKAGE, SOCKET_WRAPPER_VERSION);
1850
1851         /*
1852          * Intialize the static cache early before
1853          * any thread is able to start.
1854          */
1855         (void)swrap_ipv4_net();
1856
1857         socket_wrapper_init_fds_idx();
1858
1859         /* Needs to be called inside the sockets_mutex lock here. */
1860         max_sockets = socket_wrapper_max_sockets();
1861
1862         sockets = (struct socket_info_container *)calloc(max_sockets,
1863                                         sizeof(struct socket_info_container));
1864
1865         if (sockets == NULL) {
1866                 SWRAP_LOG(SWRAP_LOG_ERROR,
1867                           "Failed to allocate sockets array: %s",
1868                           strerror(errno));
1869                 swrap_mutex_unlock(&sockets_mutex);
1870                 exit(-1);
1871         }
1872
1873         swrap_mutex_lock(&first_free_mutex);
1874         swrap_mutex_lock(&sockets_si_global);
1875
1876         first_free = 0;
1877
1878         for (i = 0; i < max_sockets; i++) {
1879                 swrap_set_next_free(&sockets[i].info, i+1);
1880         }
1881
1882         /* mark the end of the free list */
1883         swrap_set_next_free(&sockets[max_sockets-1].info, -1);
1884
1885         swrap_mutex_unlock(&sockets_si_global);
1886         swrap_mutex_unlock(&first_free_mutex);
1887         swrap_mutex_unlock(&sockets_mutex);
1888         if (ret != 0) {
1889                 exit(-1);
1890         }
1891 }
1892
1893 bool socket_wrapper_enabled(void)
1894 {
1895         char *s = socket_wrapper_dir();
1896
1897         if (s == NULL) {
1898                 return false;
1899         }
1900
1901         SAFE_FREE(s);
1902
1903         socket_wrapper_init_sockets();
1904
1905         return true;
1906 }
1907
1908 static unsigned int socket_wrapper_default_iface(void)
1909 {
1910         const char *s = getenv("SOCKET_WRAPPER_DEFAULT_IFACE");
1911         if (s) {
1912                 unsigned int iface;
1913                 if (sscanf(s, "%u", &iface) == 1) {
1914                         if (iface >= 1 && iface <= MAX_WRAPPED_INTERFACES) {
1915                                 return iface;
1916                         }
1917                 }
1918         }
1919
1920         return 1;/* 127.0.0.1 */
1921 }
1922
1923 static void set_socket_info_index(int fd, int idx)
1924 {
1925         SWRAP_LOG(SWRAP_LOG_TRACE,
1926                   "fd=%d idx=%d",
1927                   fd, idx);
1928         socket_fds_idx[fd] = idx;
1929         /* This builtin issues a full memory barrier. */
1930         __sync_synchronize();
1931 }
1932
1933 static void reset_socket_info_index(int fd)
1934 {
1935         SWRAP_LOG(SWRAP_LOG_TRACE,
1936                   "fd=%d idx=%d",
1937                   fd, -1);
1938         set_socket_info_index(fd, -1);
1939 }
1940
1941 static int find_socket_info_index(int fd)
1942 {
1943         if (fd < 0) {
1944                 return -1;
1945         }
1946
1947         if (socket_fds_idx == NULL) {
1948                 return -1;
1949         }
1950
1951         if ((size_t)fd >= socket_fds_max) {
1952                 /*
1953                  * Do not add a log here as some applications do stupid things
1954                  * like:
1955                  *
1956                  *     for (fd = 0; fd <= getdtablesize(); fd++) {
1957                  *         close(fd)
1958                  *     };
1959                  *
1960                  * This would produce millions of lines of debug messages.
1961                  */
1962 #if 0
1963                 SWRAP_LOG(SWRAP_LOG_ERROR,
1964                           "Looking for a socket info for the fd %d is over the "
1965                           "max socket index limit of %zu.",
1966                           fd,
1967                           socket_fds_max);
1968 #endif
1969                 return -1;
1970         }
1971
1972         /* This builtin issues a full memory barrier. */
1973         __sync_synchronize();
1974         return socket_fds_idx[fd];
1975 }
1976
1977 static int swrap_add_socket_info(const struct socket_info *si_input)
1978 {
1979         struct socket_info *si = NULL;
1980         int si_index = -1;
1981
1982         if (si_input == NULL) {
1983                 errno = EINVAL;
1984                 return -1;
1985         }
1986
1987         swrap_mutex_lock(&first_free_mutex);
1988         if (first_free == -1) {
1989                 errno = ENFILE;
1990                 goto out;
1991         }
1992
1993         si_index = first_free;
1994         si = swrap_get_socket_info(si_index);
1995
1996         SWRAP_LOCK_SI(si);
1997
1998         first_free = swrap_get_next_free(si);
1999         *si = *si_input;
2000         swrap_inc_refcount(si);
2001
2002         SWRAP_UNLOCK_SI(si);
2003
2004 out:
2005         swrap_mutex_unlock(&first_free_mutex);
2006
2007         return si_index;
2008 }
2009
2010 static int swrap_create_socket(struct socket_info *si, int fd)
2011 {
2012         int idx;
2013
2014         if ((size_t)fd >= socket_fds_max) {
2015                 SWRAP_LOG(SWRAP_LOG_ERROR,
2016                           "The max socket index limit of %zu has been reached, "
2017                           "trying to add %d",
2018                           socket_fds_max,
2019                           fd);
2020                 errno = EMFILE;
2021                 return -1;
2022         }
2023
2024         idx = swrap_add_socket_info(si);
2025         if (idx == -1) {
2026                 return -1;
2027         }
2028
2029         set_socket_info_index(fd, idx);
2030
2031         return idx;
2032 }
2033
2034 static int convert_un_in(const struct sockaddr_un *un, struct sockaddr *in, socklen_t *len)
2035 {
2036         unsigned int iface;
2037         unsigned int prt;
2038         const char *p;
2039         char type;
2040
2041         p = strrchr(un->sun_path, '/');
2042         if (p) p++; else p = un->sun_path;
2043
2044         if (sscanf(p, SOCKET_FORMAT, &type, &iface, &prt) != 3) {
2045                 SWRAP_LOG(SWRAP_LOG_ERROR, "sun_path[%s] p[%s]",
2046                           un->sun_path, p);
2047                 errno = EINVAL;
2048                 return -1;
2049         }
2050
2051         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
2052                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
2053                           type, iface, prt);
2054                 errno = EINVAL;
2055                 return -1;
2056         }
2057
2058         if (prt > 0xFFFF) {
2059                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
2060                           type, iface, prt);
2061                 errno = EINVAL;
2062                 return -1;
2063         }
2064
2065         SWRAP_LOG(SWRAP_LOG_TRACE, "type %c iface %u port %u",
2066                   type, iface, prt);
2067
2068         switch(type) {
2069         case SOCKET_TYPE_CHAR_TCP:
2070         case SOCKET_TYPE_CHAR_UDP: {
2071                 struct sockaddr_in *in2 = (struct sockaddr_in *)(void *)in;
2072
2073                 if ((*len) < sizeof(*in2)) {
2074                         SWRAP_LOG(SWRAP_LOG_ERROR,
2075                                   "V4: *len(%zu) < sizeof(*in2)=%zu",
2076                                   (size_t)*len, sizeof(*in2));
2077                         errno = EINVAL;
2078                         return -1;
2079                 }
2080
2081                 memset(in2, 0, sizeof(*in2));
2082                 in2->sin_family = AF_INET;
2083                 in2->sin_addr.s_addr = htonl(swrap_ipv4_iface(iface));
2084                 in2->sin_port = htons(prt);
2085
2086                 *len = sizeof(*in2);
2087                 break;
2088         }
2089 #ifdef HAVE_IPV6
2090         case SOCKET_TYPE_CHAR_TCP_V6:
2091         case SOCKET_TYPE_CHAR_UDP_V6: {
2092                 struct sockaddr_in6 *in2 = (struct sockaddr_in6 *)(void *)in;
2093
2094                 if ((*len) < sizeof(*in2)) {
2095                         SWRAP_LOG(SWRAP_LOG_ERROR,
2096                                   "V6: *len(%zu) < sizeof(*in2)=%zu",
2097                                   (size_t)*len, sizeof(*in2));
2098                         SWRAP_LOG(SWRAP_LOG_ERROR, "LINE:%d", __LINE__);
2099                         errno = EINVAL;
2100                         return -1;
2101                 }
2102
2103                 memset(in2, 0, sizeof(*in2));
2104                 in2->sin6_family = AF_INET6;
2105                 in2->sin6_addr = *swrap_ipv6();
2106                 in2->sin6_addr.s6_addr[15] = iface;
2107                 in2->sin6_port = htons(prt);
2108
2109                 *len = sizeof(*in2);
2110                 break;
2111         }
2112 #endif
2113         default:
2114                 SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u",
2115                           type, iface, prt);
2116                 errno = EINVAL;
2117                 return -1;
2118         }
2119
2120         return 0;
2121 }
2122
2123 static int convert_in_un_remote(struct socket_info *si, const struct sockaddr *inaddr, struct sockaddr_un *un,
2124                                 int *bcast)
2125 {
2126         char type = '\0';
2127         unsigned int prt;
2128         unsigned int iface;
2129         int is_bcast = 0;
2130         char *swrap_dir = NULL;
2131
2132         if (bcast) *bcast = 0;
2133
2134         switch (inaddr->sa_family) {
2135         case AF_INET: {
2136                 const struct sockaddr_in *in =
2137                     (const struct sockaddr_in *)(const void *)inaddr;
2138                 unsigned int addr = ntohl(in->sin_addr.s_addr);
2139                 char u_type = '\0';
2140                 char b_type = '\0';
2141                 char a_type = '\0';
2142                 const unsigned int sw_net_addr = swrap_ipv4_net();
2143                 const unsigned int sw_bcast_addr = swrap_ipv4_bcast();
2144
2145                 switch (si->type) {
2146                 case SOCK_STREAM:
2147                         u_type = SOCKET_TYPE_CHAR_TCP;
2148                         break;
2149                 case SOCK_DGRAM:
2150                         u_type = SOCKET_TYPE_CHAR_UDP;
2151                         a_type = SOCKET_TYPE_CHAR_UDP;
2152                         b_type = SOCKET_TYPE_CHAR_UDP;
2153                         break;
2154                 default:
2155                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2156                         errno = ESOCKTNOSUPPORT;
2157                         return -1;
2158                 }
2159
2160                 prt = ntohs(in->sin_port);
2161                 if (a_type && addr == 0xFFFFFFFF) {
2162                         /* 255.255.255.255 only udp */
2163                         is_bcast = 2;
2164                         type = a_type;
2165                         iface = socket_wrapper_default_iface();
2166                 } else if (b_type && addr == sw_bcast_addr) {
2167                         /*
2168                          * 127.255.255.255
2169                          * or
2170                          * 10.255.255.255
2171                          * only udp
2172                          */
2173                         is_bcast = 1;
2174                         type = b_type;
2175                         iface = socket_wrapper_default_iface();
2176                 } else if ((addr & 0xFFFFFF00) == sw_net_addr) {
2177                         /* 127.0.0.X or 10.53.57.X */
2178                         is_bcast = 0;
2179                         type = u_type;
2180                         iface = (addr & 0x000000FF);
2181                 } else {
2182                         struct swrap_sockaddr_buf buf = {};
2183                         SWRAP_LOG(SWRAP_LOG_WARN,
2184                                   "%s",
2185                                   swrap_sockaddr_string(&buf, inaddr));
2186                         errno = ENETUNREACH;
2187                         return -1;
2188                 }
2189                 if (bcast) *bcast = is_bcast;
2190                 break;
2191         }
2192 #ifdef HAVE_IPV6
2193         case AF_INET6: {
2194                 const struct sockaddr_in6 *in =
2195                     (const struct sockaddr_in6 *)(const void *)inaddr;
2196                 struct in6_addr cmp1, cmp2;
2197
2198                 switch (si->type) {
2199                 case SOCK_STREAM:
2200                         type = SOCKET_TYPE_CHAR_TCP_V6;
2201                         break;
2202                 case SOCK_DGRAM:
2203                         type = SOCKET_TYPE_CHAR_UDP_V6;
2204                         break;
2205                 default:
2206                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2207                         errno = ESOCKTNOSUPPORT;
2208                         return -1;
2209                 }
2210
2211                 /* XXX no multicast/broadcast */
2212
2213                 prt = ntohs(in->sin6_port);
2214
2215                 cmp1 = *swrap_ipv6();
2216                 cmp2 = in->sin6_addr;
2217                 cmp2.s6_addr[15] = 0;
2218                 if (IN6_ARE_ADDR_EQUAL(&cmp1, &cmp2)) {
2219                         iface = in->sin6_addr.s6_addr[15];
2220                 } else {
2221                         struct swrap_sockaddr_buf buf = {};
2222                         SWRAP_LOG(SWRAP_LOG_WARN,
2223                                   "%s",
2224                                   swrap_sockaddr_string(&buf, inaddr));
2225                         errno = ENETUNREACH;
2226                         return -1;
2227                 }
2228
2229                 break;
2230         }
2231 #endif
2232         default:
2233                 SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family!");
2234                 errno = ENETUNREACH;
2235                 return -1;
2236         }
2237
2238         if (prt == 0) {
2239                 SWRAP_LOG(SWRAP_LOG_WARN, "Port not set");
2240                 errno = EINVAL;
2241                 return -1;
2242         }
2243
2244         swrap_dir = socket_wrapper_dir();
2245         if (swrap_dir == NULL) {
2246                 errno = EINVAL;
2247                 return -1;
2248         }
2249
2250         if (is_bcast) {
2251                 swrap_un_path_EINVAL(un, swrap_dir);
2252                 SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2253                 SAFE_FREE(swrap_dir);
2254                 /* the caller need to do more processing */
2255                 return 0;
2256         }
2257
2258         swrap_un_path(un, swrap_dir, type, iface, prt);
2259         SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2260
2261         SAFE_FREE(swrap_dir);
2262
2263         return 0;
2264 }
2265
2266 static int convert_in_un_alloc(struct socket_info *si, const struct sockaddr *inaddr, struct sockaddr_un *un,
2267                                int *bcast)
2268 {
2269         char type = '\0';
2270         unsigned int prt;
2271         unsigned int iface;
2272         struct stat st;
2273         int is_bcast = 0;
2274         char *swrap_dir = NULL;
2275
2276         if (bcast) *bcast = 0;
2277
2278         switch (si->family) {
2279         case AF_INET: {
2280                 const struct sockaddr_in *in =
2281                     (const struct sockaddr_in *)(const void *)inaddr;
2282                 unsigned int addr = ntohl(in->sin_addr.s_addr);
2283                 char u_type = '\0';
2284                 char d_type = '\0';
2285                 char b_type = '\0';
2286                 char a_type = '\0';
2287                 const unsigned int sw_net_addr = swrap_ipv4_net();
2288                 const unsigned int sw_bcast_addr = swrap_ipv4_bcast();
2289
2290                 prt = ntohs(in->sin_port);
2291
2292                 switch (si->type) {
2293                 case SOCK_STREAM:
2294                         u_type = SOCKET_TYPE_CHAR_TCP;
2295                         d_type = SOCKET_TYPE_CHAR_TCP;
2296                         break;
2297                 case SOCK_DGRAM:
2298                         u_type = SOCKET_TYPE_CHAR_UDP;
2299                         d_type = SOCKET_TYPE_CHAR_UDP;
2300                         a_type = SOCKET_TYPE_CHAR_UDP;
2301                         b_type = SOCKET_TYPE_CHAR_UDP;
2302                         break;
2303                 default:
2304                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2305                         errno = ESOCKTNOSUPPORT;
2306                         return -1;
2307                 }
2308
2309                 if (addr == 0) {
2310                         /* 0.0.0.0 */
2311                         is_bcast = 0;
2312                         type = d_type;
2313                         iface = socket_wrapper_default_iface();
2314                 } else if (a_type && addr == 0xFFFFFFFF) {
2315                         /* 255.255.255.255 only udp */
2316                         is_bcast = 2;
2317                         type = a_type;
2318                         iface = socket_wrapper_default_iface();
2319                 } else if (b_type && addr == sw_bcast_addr) {
2320                         /* 127.255.255.255 only udp */
2321                         is_bcast = 1;
2322                         type = b_type;
2323                         iface = socket_wrapper_default_iface();
2324                 } else if ((addr & 0xFFFFFF00) == sw_net_addr) {
2325                         /* 127.0.0.X */
2326                         is_bcast = 0;
2327                         type = u_type;
2328                         iface = (addr & 0x000000FF);
2329                 } else {
2330                         errno = EADDRNOTAVAIL;
2331                         return -1;
2332                 }
2333
2334                 /* Store the bind address for connect() */
2335                 if (si->bindname.sa_socklen == 0) {
2336                         struct sockaddr_in bind_in;
2337                         socklen_t blen = sizeof(struct sockaddr_in);
2338
2339                         ZERO_STRUCT(bind_in);
2340                         bind_in.sin_family = in->sin_family;
2341                         bind_in.sin_port = in->sin_port;
2342                         bind_in.sin_addr.s_addr = htonl(swrap_ipv4_iface(iface));
2343                         si->bindname.sa_socklen = blen;
2344                         memcpy(&si->bindname.sa.in, &bind_in, blen);
2345                 }
2346
2347                 break;
2348         }
2349 #ifdef HAVE_IPV6
2350         case AF_INET6: {
2351                 const struct sockaddr_in6 *in =
2352                     (const struct sockaddr_in6 *)(const void *)inaddr;
2353                 struct in6_addr cmp1, cmp2;
2354
2355                 switch (si->type) {
2356                 case SOCK_STREAM:
2357                         type = SOCKET_TYPE_CHAR_TCP_V6;
2358                         break;
2359                 case SOCK_DGRAM:
2360                         type = SOCKET_TYPE_CHAR_UDP_V6;
2361                         break;
2362                 default:
2363                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2364                         errno = ESOCKTNOSUPPORT;
2365                         return -1;
2366                 }
2367
2368                 /* XXX no multicast/broadcast */
2369
2370                 prt = ntohs(in->sin6_port);
2371
2372                 cmp1 = *swrap_ipv6();
2373                 cmp2 = in->sin6_addr;
2374                 cmp2.s6_addr[15] = 0;
2375                 if (IN6_IS_ADDR_UNSPECIFIED(&in->sin6_addr)) {
2376                         iface = socket_wrapper_default_iface();
2377                 } else if (IN6_ARE_ADDR_EQUAL(&cmp1, &cmp2)) {
2378                         iface = in->sin6_addr.s6_addr[15];
2379                 } else {
2380                         errno = EADDRNOTAVAIL;
2381                         return -1;
2382                 }
2383
2384                 /* Store the bind address for connect() */
2385                 if (si->bindname.sa_socklen == 0) {
2386                         struct sockaddr_in6 bind_in;
2387                         socklen_t blen = sizeof(struct sockaddr_in6);
2388
2389                         ZERO_STRUCT(bind_in);
2390                         bind_in.sin6_family = in->sin6_family;
2391                         bind_in.sin6_port = in->sin6_port;
2392
2393                         bind_in.sin6_addr = *swrap_ipv6();
2394                         bind_in.sin6_addr.s6_addr[15] = iface;
2395
2396                         memcpy(&si->bindname.sa.in6, &bind_in, blen);
2397                         si->bindname.sa_socklen = blen;
2398                 }
2399
2400                 break;
2401         }
2402 #endif
2403         default:
2404                 SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2405                 errno = EADDRNOTAVAIL;
2406                 return -1;
2407         }
2408
2409
2410         if (bcast) *bcast = is_bcast;
2411
2412         if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) {
2413                 errno = EINVAL;
2414                 return -1;
2415         }
2416
2417         swrap_dir = socket_wrapper_dir();
2418         if (swrap_dir == NULL) {
2419                 errno = EINVAL;
2420                 return -1;
2421         }
2422
2423         if (prt == 0) {
2424                 /* handle auto-allocation of ephemeral ports */
2425                 for (prt = 5001; prt < 10000; prt++) {
2426                         swrap_un_path(un, swrap_dir, type, iface, prt);
2427                         if (stat(un->sun_path, &st) == 0) continue;
2428
2429                         set_port(si->family, prt, &si->myname);
2430                         set_port(si->family, prt, &si->bindname);
2431
2432                         break;
2433                 }
2434
2435                 if (prt == 10000) {
2436                         errno = ENFILE;
2437                         SAFE_FREE(swrap_dir);
2438                         return -1;
2439                 }
2440         }
2441
2442         swrap_un_path(un, swrap_dir, type, iface, prt);
2443         SWRAP_LOG(SWRAP_LOG_DEBUG, "un path [%s]", un->sun_path);
2444
2445         SAFE_FREE(swrap_dir);
2446
2447         return 0;
2448 }
2449
2450 static struct socket_info *find_socket_info(int fd)
2451 {
2452         int idx = find_socket_info_index(fd);
2453
2454         if (idx == -1) {
2455                 return NULL;
2456         }
2457
2458         return swrap_get_socket_info(idx);
2459 }
2460
2461 #if 0 /* FIXME */
2462 static bool check_addr_port_in_use(const struct sockaddr *sa, socklen_t len)
2463 {
2464         struct socket_info_fd *f;
2465         const struct socket_info *last_s = NULL;
2466
2467         /* first catch invalid input */
2468         switch (sa->sa_family) {
2469         case AF_INET:
2470                 if (len < sizeof(struct sockaddr_in)) {
2471                         return false;
2472                 }
2473                 break;
2474 #ifdef HAVE_IPV6
2475         case AF_INET6:
2476                 if (len < sizeof(struct sockaddr_in6)) {
2477                         return false;
2478                 }
2479                 break;
2480 #endif
2481         default:
2482                 return false;
2483                 break;
2484         }
2485
2486         for (f = socket_fds; f; f = f->next) {
2487                 struct socket_info *s = swrap_get_socket_info(f->si_index);
2488
2489                 if (s == last_s) {
2490                         continue;
2491                 }
2492                 last_s = s;
2493
2494                 if (s->myname == NULL) {
2495                         continue;
2496                 }
2497                 if (s->myname->sa_family != sa->sa_family) {
2498                         continue;
2499                 }
2500                 switch (s->myname->sa_family) {
2501                 case AF_INET: {
2502                         struct sockaddr_in *sin1, *sin2;
2503
2504                         sin1 = (struct sockaddr_in *)s->myname;
2505                         sin2 = (struct sockaddr_in *)sa;
2506
2507                         if (sin1->sin_addr.s_addr == htonl(INADDR_ANY)) {
2508                                 continue;
2509                         }
2510                         if (sin1->sin_port != sin2->sin_port) {
2511                                 continue;
2512                         }
2513                         if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr) {
2514                                 continue;
2515                         }
2516
2517                         /* found */
2518                         return true;
2519                         break;
2520                 }
2521 #ifdef HAVE_IPV6
2522                 case AF_INET6: {
2523                         struct sockaddr_in6 *sin1, *sin2;
2524
2525                         sin1 = (struct sockaddr_in6 *)s->myname;
2526                         sin2 = (struct sockaddr_in6 *)sa;
2527
2528                         if (sin1->sin6_port != sin2->sin6_port) {
2529                                 continue;
2530                         }
2531                         if (!IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2532                                                 &sin2->sin6_addr))
2533                         {
2534                                 continue;
2535                         }
2536
2537                         /* found */
2538                         return true;
2539                         break;
2540                 }
2541 #endif
2542                 default:
2543                         continue;
2544                         break;
2545
2546                 }
2547         }
2548
2549         return false;
2550 }
2551 #endif
2552
2553 static void swrap_remove_stale(int fd);
2554
2555 static int sockaddr_convert_to_un(struct socket_info *si,
2556                                   const struct sockaddr *in_addr,
2557                                   socklen_t in_len,
2558                                   struct sockaddr_un *out_addr,
2559                                   int alloc_sock,
2560                                   int *bcast)
2561 {
2562         struct sockaddr *out = (struct sockaddr *)(void *)out_addr;
2563
2564         (void) in_len; /* unused */
2565
2566         if (out_addr == NULL) {
2567                 return 0;
2568         }
2569
2570         out->sa_family = AF_UNIX;
2571 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2572         out->sa_len = sizeof(*out_addr);
2573 #endif
2574
2575         switch (in_addr->sa_family) {
2576         case AF_UNSPEC: {
2577                 const struct sockaddr_in *sin;
2578                 if (si->family != AF_INET) {
2579                         break;
2580                 }
2581                 if (in_len < sizeof(struct sockaddr_in)) {
2582                         break;
2583                 }
2584                 sin = (const struct sockaddr_in *)(const void *)in_addr;
2585                 if(sin->sin_addr.s_addr != htonl(INADDR_ANY)) {
2586                         break;
2587                 }
2588
2589                 /*
2590                  * Note: in the special case of AF_UNSPEC and INADDR_ANY,
2591                  * AF_UNSPEC is mapped to AF_INET and must be treated here.
2592                  */
2593
2594                 FALL_THROUGH;
2595         }
2596         case AF_INET:
2597 #ifdef HAVE_IPV6
2598         case AF_INET6:
2599 #endif
2600                 switch (si->type) {
2601                 case SOCK_STREAM:
2602                 case SOCK_DGRAM:
2603                         break;
2604                 default:
2605                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2606                         errno = ESOCKTNOSUPPORT;
2607                         return -1;
2608                 }
2609                 if (alloc_sock) {
2610                         return convert_in_un_alloc(si, in_addr, out_addr, bcast);
2611                 } else {
2612                         return convert_in_un_remote(si, in_addr, out_addr, bcast);
2613                 }
2614         default:
2615                 break;
2616         }
2617
2618         errno = EAFNOSUPPORT;
2619         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2620         return -1;
2621 }
2622
2623 static int sockaddr_convert_from_un(const struct socket_info *si,
2624                                     const struct sockaddr_un *in_addr,
2625                                     socklen_t un_addrlen,
2626                                     int family,
2627                                     struct sockaddr *out_addr,
2628                                     socklen_t *out_addrlen)
2629 {
2630         int ret;
2631
2632         if (out_addr == NULL || out_addrlen == NULL)
2633                 return 0;
2634
2635         if (un_addrlen == 0) {
2636                 *out_addrlen = 0;
2637                 return 0;
2638         }
2639
2640         switch (family) {
2641         case AF_INET:
2642 #ifdef HAVE_IPV6
2643         case AF_INET6:
2644 #endif
2645                 switch (si->type) {
2646                 case SOCK_STREAM:
2647                 case SOCK_DGRAM:
2648                         break;
2649                 default:
2650                         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown socket type!");
2651                         errno = ESOCKTNOSUPPORT;
2652                         return -1;
2653                 }
2654                 ret = convert_un_in(in_addr, out_addr, out_addrlen);
2655 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2656                 out_addr->sa_len = *out_addrlen;
2657 #endif
2658                 return ret;
2659         default:
2660                 break;
2661         }
2662
2663         SWRAP_LOG(SWRAP_LOG_ERROR, "Unknown address family");
2664         errno = EAFNOSUPPORT;
2665         return -1;
2666 }
2667
2668 enum swrap_packet_type {
2669         SWRAP_CONNECT_SEND,
2670         SWRAP_CONNECT_UNREACH,
2671         SWRAP_CONNECT_RECV,
2672         SWRAP_CONNECT_ACK,
2673         SWRAP_ACCEPT_SEND,
2674         SWRAP_ACCEPT_RECV,
2675         SWRAP_ACCEPT_ACK,
2676         SWRAP_RECVFROM,
2677         SWRAP_SENDTO,
2678         SWRAP_SENDTO_UNREACH,
2679         SWRAP_PENDING_RST,
2680         SWRAP_RECV,
2681         SWRAP_RECV_RST,
2682         SWRAP_SEND,
2683         SWRAP_SEND_RST,
2684         SWRAP_CLOSE_SEND,
2685         SWRAP_CLOSE_RECV,
2686         SWRAP_CLOSE_ACK,
2687 };
2688
2689 struct swrap_file_hdr {
2690         uint32_t        magic;
2691         uint16_t        version_major;
2692         uint16_t        version_minor;
2693         int32_t         timezone;
2694         uint32_t        sigfigs;
2695         uint32_t        frame_max_len;
2696 #define SWRAP_FRAME_LENGTH_MAX 0xFFFF
2697         uint32_t        link_type;
2698 };
2699 #define SWRAP_FILE_HDR_SIZE 24
2700
2701 struct swrap_packet_frame {
2702         uint32_t seconds;
2703         uint32_t micro_seconds;
2704         uint32_t recorded_length;
2705         uint32_t full_length;
2706 };
2707 #define SWRAP_PACKET_FRAME_SIZE 16
2708
2709 union swrap_packet_ip {
2710         struct {
2711                 uint8_t         ver_hdrlen;
2712                 uint8_t         tos;
2713                 uint16_t        packet_length;
2714                 uint16_t        identification;
2715                 uint8_t         flags;
2716                 uint8_t         fragment;
2717                 uint8_t         ttl;
2718                 uint8_t         protocol;
2719                 uint16_t        hdr_checksum;
2720                 uint32_t        src_addr;
2721                 uint32_t        dest_addr;
2722         } v4;
2723 #define SWRAP_PACKET_IP_V4_SIZE 20
2724         struct {
2725                 uint8_t         ver_prio;
2726                 uint8_t         flow_label_high;
2727                 uint16_t        flow_label_low;
2728                 uint16_t        payload_length;
2729                 uint8_t         next_header;
2730                 uint8_t         hop_limit;
2731                 uint8_t         src_addr[16];
2732                 uint8_t         dest_addr[16];
2733         } v6;
2734 #define SWRAP_PACKET_IP_V6_SIZE 40
2735 };
2736 #define SWRAP_PACKET_IP_SIZE 40
2737
2738 union swrap_packet_payload {
2739         struct {
2740                 uint16_t        source_port;
2741                 uint16_t        dest_port;
2742                 uint32_t        seq_num;
2743                 uint32_t        ack_num;
2744                 uint8_t         hdr_length;
2745                 uint8_t         control;
2746                 uint16_t        window;
2747                 uint16_t        checksum;
2748                 uint16_t        urg;
2749         } tcp;
2750 #define SWRAP_PACKET_PAYLOAD_TCP_SIZE 20
2751         struct {
2752                 uint16_t        source_port;
2753                 uint16_t        dest_port;
2754                 uint16_t        length;
2755                 uint16_t        checksum;
2756         } udp;
2757 #define SWRAP_PACKET_PAYLOAD_UDP_SIZE 8
2758         struct {
2759                 uint8_t         type;
2760                 uint8_t         code;
2761                 uint16_t        checksum;
2762                 uint32_t        unused;
2763         } icmp4;
2764 #define SWRAP_PACKET_PAYLOAD_ICMP4_SIZE 8
2765         struct {
2766                 uint8_t         type;
2767                 uint8_t         code;
2768                 uint16_t        checksum;
2769                 uint32_t        unused;
2770         } icmp6;
2771 #define SWRAP_PACKET_PAYLOAD_ICMP6_SIZE 8
2772 };
2773 #define SWRAP_PACKET_PAYLOAD_SIZE 20
2774
2775 #define SWRAP_PACKET_MIN_ALLOC \
2776         (SWRAP_PACKET_FRAME_SIZE + \
2777          SWRAP_PACKET_IP_SIZE + \
2778          SWRAP_PACKET_PAYLOAD_SIZE)
2779
2780 static const char *swrap_pcap_init_file(void)
2781 {
2782         static int initialized = 0;
2783         static const char *s = NULL;
2784         static const struct swrap_file_hdr h;
2785         static const struct swrap_packet_frame f;
2786         static const union swrap_packet_ip i;
2787         static const union swrap_packet_payload p;
2788
2789         if (initialized == 1) {
2790                 return s;
2791         }
2792         initialized = 1;
2793
2794         /*
2795          * TODO: don't use the structs use plain buffer offsets
2796          *       and PUSH_U8(), PUSH_U16() and PUSH_U32()
2797          *
2798          * for now make sure we disable PCAP support
2799          * if the struct has alignment!
2800          */
2801         if (sizeof(h) != SWRAP_FILE_HDR_SIZE) {
2802                 return NULL;
2803         }
2804         if (sizeof(f) != SWRAP_PACKET_FRAME_SIZE) {
2805                 return NULL;
2806         }
2807         if (sizeof(i) != SWRAP_PACKET_IP_SIZE) {
2808                 return NULL;
2809         }
2810         if (sizeof(i.v4) != SWRAP_PACKET_IP_V4_SIZE) {
2811                 return NULL;
2812         }
2813         if (sizeof(i.v6) != SWRAP_PACKET_IP_V6_SIZE) {
2814                 return NULL;
2815         }
2816         if (sizeof(p) != SWRAP_PACKET_PAYLOAD_SIZE) {
2817                 return NULL;
2818         }
2819         if (sizeof(p.tcp) != SWRAP_PACKET_PAYLOAD_TCP_SIZE) {
2820                 return NULL;
2821         }
2822         if (sizeof(p.udp) != SWRAP_PACKET_PAYLOAD_UDP_SIZE) {
2823                 return NULL;
2824         }
2825         if (sizeof(p.icmp4) != SWRAP_PACKET_PAYLOAD_ICMP4_SIZE) {
2826                 return NULL;
2827         }
2828         if (sizeof(p.icmp6) != SWRAP_PACKET_PAYLOAD_ICMP6_SIZE) {
2829                 return NULL;
2830         }
2831
2832         s = getenv("SOCKET_WRAPPER_PCAP_FILE");
2833         if (s == NULL) {
2834                 return NULL;
2835         }
2836         if (strncmp(s, "./", 2) == 0) {
2837                 s += 2;
2838         }
2839         SWRAP_LOG(SWRAP_LOG_TRACE, "SOCKET_WRAPPER_PCAP_FILE: %s", s);
2840         return s;
2841 }
2842
2843 static uint8_t *swrap_pcap_packet_init(struct timeval *tval,
2844                                        const struct sockaddr *src,
2845                                        const struct sockaddr *dest,
2846                                        int socket_type,
2847                                        const uint8_t *payload,
2848                                        size_t payload_len,
2849                                        unsigned long tcp_seqno,
2850                                        unsigned long tcp_ack,
2851                                        unsigned char tcp_ctl,
2852                                        int unreachable,
2853                                        size_t *_packet_len)
2854 {
2855         uint8_t *base = NULL;
2856         uint8_t *buf = NULL;
2857         union {
2858                 uint8_t *ptr;
2859                 struct swrap_packet_frame *frame;
2860         } f;
2861         union {
2862                 uint8_t *ptr;
2863                 union swrap_packet_ip *ip;
2864         } i;
2865         union swrap_packet_payload *pay;
2866         size_t packet_len;
2867         size_t alloc_len;
2868         size_t nonwire_len = sizeof(struct swrap_packet_frame);
2869         size_t wire_hdr_len = 0;
2870         size_t wire_len = 0;
2871         size_t ip_hdr_len = 0;
2872         size_t icmp_hdr_len = 0;
2873         size_t icmp_truncate_len = 0;
2874         uint8_t protocol = 0, icmp_protocol = 0;
2875         const struct sockaddr_in *src_in = NULL;
2876         const struct sockaddr_in *dest_in = NULL;
2877 #ifdef HAVE_IPV6
2878         const struct sockaddr_in6 *src_in6 = NULL;
2879         const struct sockaddr_in6 *dest_in6 = NULL;
2880 #endif
2881         uint16_t src_port;
2882         uint16_t dest_port;
2883
2884         switch (src->sa_family) {
2885         case AF_INET:
2886                 src_in = (const struct sockaddr_in *)(const void *)src;
2887                 dest_in = (const struct sockaddr_in *)(const void *)dest;
2888                 src_port = src_in->sin_port;
2889                 dest_port = dest_in->sin_port;
2890                 ip_hdr_len = sizeof(i.ip->v4);
2891                 break;
2892 #ifdef HAVE_IPV6
2893         case AF_INET6:
2894                 src_in6 = (const struct sockaddr_in6 *)(const void *)src;
2895                 dest_in6 = (const struct sockaddr_in6 *)(const void *)dest;
2896                 src_port = src_in6->sin6_port;
2897                 dest_port = dest_in6->sin6_port;
2898                 ip_hdr_len = sizeof(i.ip->v6);
2899                 break;
2900 #endif
2901         default:
2902                 return NULL;
2903         }
2904
2905         switch (socket_type) {
2906         case SOCK_STREAM:
2907                 protocol = 0x06; /* TCP */
2908                 wire_hdr_len = ip_hdr_len + sizeof(pay->tcp);
2909                 wire_len = wire_hdr_len + payload_len;
2910                 break;
2911
2912         case SOCK_DGRAM:
2913                 protocol = 0x11; /* UDP */
2914                 wire_hdr_len = ip_hdr_len + sizeof(pay->udp);
2915                 wire_len = wire_hdr_len + payload_len;
2916                 break;
2917
2918         default:
2919                 return NULL;
2920         }
2921
2922         if (unreachable) {
2923                 icmp_protocol = protocol;
2924                 switch (src->sa_family) {
2925                 case AF_INET:
2926                         protocol = 0x01; /* ICMPv4 */
2927                         icmp_hdr_len = ip_hdr_len + sizeof(pay->icmp4);
2928                         break;
2929 #ifdef HAVE_IPV6
2930                 case AF_INET6:
2931                         protocol = 0x3A; /* ICMPv6 */
2932                         icmp_hdr_len = ip_hdr_len + sizeof(pay->icmp6);
2933                         break;
2934 #endif
2935                 }
2936                 if (wire_len > 64 ) {
2937                         icmp_truncate_len = wire_len - 64;
2938                 }
2939                 wire_len += icmp_hdr_len;
2940         }
2941
2942         packet_len = nonwire_len + wire_len;
2943         alloc_len = packet_len;
2944         if (alloc_len < SWRAP_PACKET_MIN_ALLOC) {
2945                 alloc_len = SWRAP_PACKET_MIN_ALLOC;
2946         }
2947
2948         base = (uint8_t *)calloc(1, alloc_len);
2949         if (base == NULL) {
2950                 return NULL;
2951         }
2952
2953         buf = base;
2954         f.ptr = buf;
2955
2956         f.frame->seconds                = tval->tv_sec;
2957         f.frame->micro_seconds  = tval->tv_usec;
2958         f.frame->recorded_length        = wire_len - icmp_truncate_len;
2959         f.frame->full_length    = wire_len - icmp_truncate_len;
2960
2961         buf += SWRAP_PACKET_FRAME_SIZE;
2962
2963         i.ptr = buf;
2964         switch (src->sa_family) {
2965         case AF_INET:
2966                 if (src_in == NULL || dest_in == NULL) {
2967                         SAFE_FREE(base);
2968                         return NULL;
2969                 }
2970
2971                 i.ip->v4.ver_hdrlen     = 0x45; /* version 4 and 5 * 32 bit words */
2972                 i.ip->v4.tos            = 0x00;
2973                 i.ip->v4.packet_length  = htons(wire_len - icmp_truncate_len);
2974                 i.ip->v4.identification = htons(0xFFFF);
2975                 i.ip->v4.flags          = 0x40; /* BIT 1 set - means don't fragment */
2976                 i.ip->v4.fragment       = htons(0x0000);
2977                 i.ip->v4.ttl            = 0xFF;
2978                 i.ip->v4.protocol       = protocol;
2979                 i.ip->v4.hdr_checksum   = htons(0x0000);
2980                 i.ip->v4.src_addr       = src_in->sin_addr.s_addr;
2981                 i.ip->v4.dest_addr      = dest_in->sin_addr.s_addr;
2982                 buf += SWRAP_PACKET_IP_V4_SIZE;
2983                 break;
2984 #ifdef HAVE_IPV6
2985         case AF_INET6:
2986                 if (src_in6 == NULL || dest_in6 == NULL) {
2987                         SAFE_FREE(base);
2988                         return NULL;
2989                 }
2990
2991                 i.ip->v6.ver_prio               = 0x60; /* version 4 and 5 * 32 bit words */
2992                 i.ip->v6.flow_label_high        = 0x00;
2993                 i.ip->v6.flow_label_low = 0x0000;
2994                 i.ip->v6.payload_length = htons(wire_len - icmp_truncate_len); /* TODO */
2995                 i.ip->v6.next_header    = protocol;
2996                 memcpy(i.ip->v6.src_addr, src_in6->sin6_addr.s6_addr, 16);
2997                 memcpy(i.ip->v6.dest_addr, dest_in6->sin6_addr.s6_addr, 16);
2998                 buf += SWRAP_PACKET_IP_V6_SIZE;
2999                 break;
3000 #endif
3001         }
3002
3003         if (unreachable) {
3004                 pay = (union swrap_packet_payload *)(void *)buf;
3005                 switch (src->sa_family) {
3006                 case AF_INET:
3007                         pay->icmp4.type         = 0x03; /* destination unreachable */
3008                         pay->icmp4.code         = 0x01; /* host unreachable */
3009                         pay->icmp4.checksum     = htons(0x0000);
3010                         pay->icmp4.unused       = htonl(0x00000000);
3011
3012                         buf += SWRAP_PACKET_PAYLOAD_ICMP4_SIZE;
3013
3014                         /* set the ip header in the ICMP payload */
3015                         i.ptr = buf;
3016                         i.ip->v4.ver_hdrlen     = 0x45; /* version 4 and 5 * 32 bit words */
3017                         i.ip->v4.tos            = 0x00;
3018                         i.ip->v4.packet_length  = htons(wire_len - icmp_hdr_len);
3019                         i.ip->v4.identification = htons(0xFFFF);
3020                         i.ip->v4.flags          = 0x40; /* BIT 1 set - means don't fragment */
3021                         i.ip->v4.fragment       = htons(0x0000);
3022                         i.ip->v4.ttl            = 0xFF;
3023                         i.ip->v4.protocol       = icmp_protocol;
3024                         i.ip->v4.hdr_checksum   = htons(0x0000);
3025                         i.ip->v4.src_addr       = dest_in->sin_addr.s_addr;
3026                         i.ip->v4.dest_addr      = src_in->sin_addr.s_addr;
3027
3028                         buf += SWRAP_PACKET_IP_V4_SIZE;
3029
3030                         src_port = dest_in->sin_port;
3031                         dest_port = src_in->sin_port;
3032                         break;
3033 #ifdef HAVE_IPV6
3034                 case AF_INET6:
3035                         pay->icmp6.type         = 0x01; /* destination unreachable */
3036                         pay->icmp6.code         = 0x03; /* address unreachable */
3037                         pay->icmp6.checksum     = htons(0x0000);
3038                         pay->icmp6.unused       = htonl(0x00000000);
3039                         buf += SWRAP_PACKET_PAYLOAD_ICMP6_SIZE;
3040
3041                         /* set the ip header in the ICMP payload */
3042                         i.ptr = buf;
3043                         i.ip->v6.ver_prio               = 0x60; /* version 4 and 5 * 32 bit words */
3044                         i.ip->v6.flow_label_high        = 0x00;
3045                         i.ip->v6.flow_label_low = 0x0000;
3046                         i.ip->v6.payload_length = htons(wire_len - icmp_truncate_len); /* TODO */
3047                         i.ip->v6.next_header    = protocol;
3048                         memcpy(i.ip->v6.src_addr, dest_in6->sin6_addr.s6_addr, 16);
3049                         memcpy(i.ip->v6.dest_addr, src_in6->sin6_addr.s6_addr, 16);
3050
3051                         buf += SWRAP_PACKET_IP_V6_SIZE;
3052
3053                         src_port = dest_in6->sin6_port;
3054                         dest_port = src_in6->sin6_port;
3055                         break;
3056 #endif
3057                 }
3058         }
3059
3060         pay = (union swrap_packet_payload *)(void *)buf;
3061
3062         switch (socket_type) {
3063         case SOCK_STREAM:
3064                 pay->tcp.source_port    = src_port;
3065                 pay->tcp.dest_port      = dest_port;
3066                 pay->tcp.seq_num        = htonl(tcp_seqno);
3067                 pay->tcp.ack_num        = htonl(tcp_ack);
3068                 pay->tcp.hdr_length     = 0x50; /* 5 * 32 bit words */
3069                 pay->tcp.control        = tcp_ctl;
3070                 pay->tcp.window         = htons(0x7FFF);
3071                 pay->tcp.checksum       = htons(0x0000);
3072                 pay->tcp.urg            = htons(0x0000);
3073                 buf += SWRAP_PACKET_PAYLOAD_TCP_SIZE;
3074
3075                 break;
3076
3077         case SOCK_DGRAM:
3078                 pay->udp.source_port    = src_port;
3079                 pay->udp.dest_port      = dest_port;
3080                 pay->udp.length         = htons(8 + payload_len);
3081                 pay->udp.checksum       = htons(0x0000);
3082                 buf += SWRAP_PACKET_PAYLOAD_UDP_SIZE;
3083
3084                 break;
3085         }
3086
3087         if (payload && payload_len > 0) {
3088                 memcpy(buf, payload, payload_len);
3089         }
3090
3091         *_packet_len = packet_len - icmp_truncate_len;
3092         return base;
3093 }
3094
3095 static int swrap_pcap_get_fd(const char *fname)
3096 {
3097         static int fd = -1;
3098
3099         if (fd != -1) {
3100                 return fd;
3101         }
3102
3103         fd = libc_open(fname, O_WRONLY|O_CREAT|O_EXCL|O_APPEND, 0644);
3104         if (fd != -1) {
3105                 struct swrap_file_hdr file_hdr;
3106                 file_hdr.magic          = 0xA1B2C3D4;
3107                 file_hdr.version_major  = 0x0002;
3108                 file_hdr.version_minor  = 0x0004;
3109                 file_hdr.timezone       = 0x00000000;
3110                 file_hdr.sigfigs        = 0x00000000;
3111                 file_hdr.frame_max_len  = SWRAP_FRAME_LENGTH_MAX;
3112                 file_hdr.link_type      = 0x0065; /* 101 RAW IP */
3113
3114                 if (libc_write(fd, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
3115                         libc_close(fd);
3116                         fd = -1;
3117                 }
3118                 return fd;
3119         }
3120
3121         fd = libc_open(fname, O_WRONLY|O_APPEND, 0644);
3122
3123         return fd;
3124 }
3125
3126 static uint8_t *swrap_pcap_marshall_packet(struct socket_info *si,
3127                                            const struct sockaddr *addr,
3128                                            enum swrap_packet_type type,
3129                                            const void *buf, size_t len,
3130                                            size_t *packet_len)
3131 {
3132         const struct sockaddr *src_addr;
3133         const struct sockaddr *dest_addr;
3134         unsigned long tcp_seqno = 0;
3135         unsigned long tcp_ack = 0;
3136         unsigned char tcp_ctl = 0;
3137         int unreachable = 0;
3138
3139         struct timeval tv;
3140
3141         switch (si->family) {
3142         case AF_INET:
3143                 break;
3144 #ifdef HAVE_IPV6
3145         case AF_INET6:
3146                 break;
3147 #endif
3148         default:
3149                 return NULL;
3150         }
3151
3152         switch (type) {
3153         case SWRAP_CONNECT_SEND:
3154                 if (si->type != SOCK_STREAM) {
3155                         return NULL;
3156                 }
3157
3158                 src_addr  = &si->myname.sa.s;
3159                 dest_addr = addr;
3160
3161                 tcp_seqno = si->io.pck_snd;
3162                 tcp_ack = si->io.pck_rcv;
3163                 tcp_ctl = 0x02; /* SYN */
3164
3165                 si->io.pck_snd += 1;
3166
3167                 break;
3168
3169         case SWRAP_CONNECT_RECV:
3170                 if (si->type != SOCK_STREAM) {
3171                         return NULL;
3172                 }
3173
3174                 dest_addr = &si->myname.sa.s;
3175                 src_addr = addr;
3176
3177                 tcp_seqno = si->io.pck_rcv;
3178                 tcp_ack = si->io.pck_snd;
3179                 tcp_ctl = 0x12; /** SYN,ACK */
3180
3181                 si->io.pck_rcv += 1;
3182
3183                 break;
3184
3185         case SWRAP_CONNECT_UNREACH:
3186                 if (si->type != SOCK_STREAM) {
3187                         return NULL;
3188                 }
3189
3190                 dest_addr = &si->myname.sa.s;
3191                 src_addr  = addr;
3192
3193                 /* Unreachable: resend the data of SWRAP_CONNECT_SEND */
3194                 tcp_seqno = si->io.pck_snd - 1;
3195                 tcp_ack = si->io.pck_rcv;
3196                 tcp_ctl = 0x02; /* SYN */
3197                 unreachable = 1;
3198
3199                 break;
3200
3201         case SWRAP_CONNECT_ACK:
3202                 if (si->type != SOCK_STREAM) {
3203                         return NULL;
3204                 }
3205
3206                 src_addr  = &si->myname.sa.s;
3207                 dest_addr = addr;
3208
3209                 tcp_seqno = si->io.pck_snd;
3210                 tcp_ack = si->io.pck_rcv;
3211                 tcp_ctl = 0x10; /* ACK */
3212
3213                 break;
3214
3215         case SWRAP_ACCEPT_SEND:
3216                 if (si->type != SOCK_STREAM) {
3217                         return NULL;
3218                 }
3219
3220                 dest_addr = &si->myname.sa.s;
3221                 src_addr = addr;
3222
3223                 tcp_seqno = si->io.pck_rcv;
3224                 tcp_ack = si->io.pck_snd;
3225                 tcp_ctl = 0x02; /* SYN */
3226
3227                 si->io.pck_rcv += 1;
3228
3229                 break;
3230
3231         case SWRAP_ACCEPT_RECV:
3232                 if (si->type != SOCK_STREAM) {
3233                         return NULL;
3234                 }
3235
3236                 src_addr = &si->myname.sa.s;
3237                 dest_addr = addr;
3238
3239                 tcp_seqno = si->io.pck_snd;
3240                 tcp_ack = si->io.pck_rcv;
3241                 tcp_ctl = 0x12; /* SYN,ACK */
3242
3243                 si->io.pck_snd += 1;
3244
3245                 break;
3246
3247         case SWRAP_ACCEPT_ACK:
3248                 if (si->type != SOCK_STREAM) {
3249                         return NULL;
3250                 }
3251
3252                 dest_addr = &si->myname.sa.s;
3253                 src_addr = addr;
3254
3255                 tcp_seqno = si->io.pck_rcv;
3256                 tcp_ack = si->io.pck_snd;
3257                 tcp_ctl = 0x10; /* ACK */
3258
3259                 break;
3260
3261         case SWRAP_SEND:
3262                 src_addr  = &si->myname.sa.s;
3263                 dest_addr = &si->peername.sa.s;
3264
3265                 tcp_seqno = si->io.pck_snd;
3266                 tcp_ack = si->io.pck_rcv;
3267                 tcp_ctl = 0x18; /* PSH,ACK */
3268
3269                 si->io.pck_snd += len;
3270
3271                 break;
3272
3273         case SWRAP_SEND_RST:
3274                 dest_addr = &si->myname.sa.s;
3275                 src_addr  = &si->peername.sa.s;
3276
3277                 if (si->type == SOCK_DGRAM) {
3278                         return swrap_pcap_marshall_packet(si,
3279                                                           &si->peername.sa.s,
3280                                                           SWRAP_SENDTO_UNREACH,
3281                                                           buf,
3282                                                           len,
3283                                                           packet_len);
3284                 }
3285
3286                 tcp_seqno = si->io.pck_rcv;
3287                 tcp_ack = si->io.pck_snd;
3288                 tcp_ctl = 0x14; /** RST,ACK */
3289
3290                 break;
3291
3292         case SWRAP_PENDING_RST:
3293                 dest_addr = &si->myname.sa.s;
3294                 src_addr  = &si->peername.sa.s;
3295
3296                 if (si->type == SOCK_DGRAM) {
3297                         return NULL;
3298                 }
3299
3300                 tcp_seqno = si->io.pck_rcv;
3301                 tcp_ack = si->io.pck_snd;
3302                 tcp_ctl = 0x14; /* RST,ACK */
3303
3304                 break;
3305
3306         case SWRAP_RECV:
3307                 dest_addr = &si->myname.sa.s;
3308                 src_addr  = &si->peername.sa.s;
3309
3310                 tcp_seqno = si->io.pck_rcv;
3311                 tcp_ack = si->io.pck_snd;
3312                 tcp_ctl = 0x18; /* PSH,ACK */
3313
3314                 si->io.pck_rcv += len;
3315
3316                 break;
3317
3318         case SWRAP_RECV_RST:
3319                 dest_addr = &si->myname.sa.s;
3320                 src_addr  = &si->peername.sa.s;
3321
3322                 if (si->type == SOCK_DGRAM) {
3323                         return NULL;
3324                 }
3325
3326                 tcp_seqno = si->io.pck_rcv;
3327                 tcp_ack = si->io.pck_snd;
3328                 tcp_ctl = 0x14; /* RST,ACK */
3329
3330                 break;
3331
3332         case SWRAP_SENDTO:
3333                 src_addr = &si->myname.sa.s;
3334                 dest_addr = addr;
3335
3336                 si->io.pck_snd += len;
3337
3338                 break;
3339
3340         case SWRAP_SENDTO_UNREACH:
3341                 dest_addr = &si->myname.sa.s;
3342                 src_addr = addr;
3343
3344                 unreachable = 1;
3345
3346                 break;
3347
3348         case SWRAP_RECVFROM:
3349                 dest_addr = &si->myname.sa.s;
3350                 src_addr = addr;
3351
3352                 si->io.pck_rcv += len;
3353
3354                 break;
3355
3356         case SWRAP_CLOSE_SEND:
3357                 if (si->type != SOCK_STREAM) {
3358                         return NULL;
3359                 }
3360
3361                 src_addr  = &si->myname.sa.s;
3362                 dest_addr = &si->peername.sa.s;
3363
3364                 tcp_seqno = si->io.pck_snd;
3365                 tcp_ack = si->io.pck_rcv;
3366                 tcp_ctl = 0x11; /* FIN, ACK */
3367
3368                 si->io.pck_snd += 1;
3369
3370                 break;
3371
3372         case SWRAP_CLOSE_RECV:
3373                 if (si->type != SOCK_STREAM) {
3374                         return NULL;
3375                 }
3376
3377                 dest_addr = &si->myname.sa.s;
3378                 src_addr  = &si->peername.sa.s;
3379
3380                 tcp_seqno = si->io.pck_rcv;
3381                 tcp_ack = si->io.pck_snd;
3382                 tcp_ctl = 0x11; /* FIN,ACK */
3383
3384                 si->io.pck_rcv += 1;
3385
3386                 break;
3387
3388         case SWRAP_CLOSE_ACK:
3389                 if (si->type != SOCK_STREAM) {
3390                         return NULL;
3391                 }
3392
3393                 src_addr  = &si->myname.sa.s;
3394                 dest_addr = &si->peername.sa.s;
3395
3396                 tcp_seqno = si->io.pck_snd;
3397                 tcp_ack = si->io.pck_rcv;
3398                 tcp_ctl = 0x10; /* ACK */
3399
3400                 break;
3401         default:
3402                 return NULL;
3403         }
3404
3405         swrapGetTimeOfDay(&tv);
3406
3407         return swrap_pcap_packet_init(&tv,
3408                                       src_addr,
3409                                       dest_addr,
3410                                       si->type,
3411                                       (const uint8_t *)buf,
3412                                       len,
3413                                       tcp_seqno,
3414                                       tcp_ack,
3415                                       tcp_ctl,
3416                                       unreachable,
3417                                       packet_len);
3418 }
3419
3420 static void swrap_pcap_dump_packet(struct socket_info *si,
3421                                    const struct sockaddr *addr,
3422                                    enum swrap_packet_type type,
3423                                    const void *buf, size_t len)
3424 {
3425         const char *file_name;
3426         uint8_t *packet;
3427         size_t packet_len = 0;
3428         int fd;
3429
3430         swrap_mutex_lock(&pcap_dump_mutex);
3431
3432         file_name = swrap_pcap_init_file();
3433         if (!file_name) {
3434                 goto done;
3435         }
3436
3437         packet = swrap_pcap_marshall_packet(si,
3438                                             addr,
3439                                             type,
3440                                             buf,
3441                                             len,
3442                                             &packet_len);
3443         if (packet == NULL) {
3444                 goto done;
3445         }
3446
3447         fd = swrap_pcap_get_fd(file_name);
3448         if (fd != -1) {
3449                 if (libc_write(fd, packet, packet_len) != (ssize_t)packet_len) {
3450                         free(packet);
3451                         goto done;
3452                 }
3453         }
3454
3455         free(packet);
3456
3457 done:
3458         swrap_mutex_unlock(&pcap_dump_mutex);
3459 }
3460
3461 /****************************************************************************
3462  *   SIGNALFD
3463  ***************************************************************************/
3464
3465 #ifdef HAVE_SIGNALFD
3466 static int swrap_signalfd(int fd, const sigset_t *mask, int flags)
3467 {
3468         int rc;
3469
3470         rc = libc_signalfd(fd, mask, flags);
3471         if (rc != -1) {
3472                 swrap_remove_stale(fd);
3473         }
3474
3475         return rc;
3476 }
3477
3478 int signalfd(int fd, const sigset_t *mask, int flags)
3479 {
3480         return swrap_signalfd(fd, mask, flags);
3481 }
3482 #endif
3483
3484 /****************************************************************************
3485  *   SOCKET
3486  ***************************************************************************/
3487
3488 static int swrap_socket(int family, int type, int protocol)
3489 {
3490         struct socket_info *si = NULL;
3491         struct socket_info _si = { 0 };
3492         int fd;
3493         int ret;
3494         int real_type = type;
3495
3496         /*
3497          * Remove possible addition flags passed to socket() so
3498          * do not fail checking the type.
3499          * See https://lwn.net/Articles/281965/
3500          */
3501 #ifdef SOCK_CLOEXEC
3502         real_type &= ~SOCK_CLOEXEC;
3503 #endif
3504 #ifdef SOCK_NONBLOCK
3505         real_type &= ~SOCK_NONBLOCK;
3506 #endif
3507
3508         if (!socket_wrapper_enabled()) {
3509                 return libc_socket(family, type, protocol);
3510         }
3511
3512         switch (family) {
3513         case AF_INET:
3514 #ifdef HAVE_IPV6
3515         case AF_INET6:
3516 #endif
3517                 break;
3518 #ifdef AF_NETLINK
3519         case AF_NETLINK:
3520 #endif /* AF_NETLINK */
3521 #ifdef AF_PACKET
3522         case AF_PACKET:
3523 #endif /* AF_PACKET */
3524         case AF_UNIX:
3525                 fd = libc_socket(family, type, protocol);
3526                 if (fd != -1) {
3527                         /* Check if we have a stale fd and remove it */
3528                         swrap_remove_stale(fd);
3529                         SWRAP_LOG(SWRAP_LOG_TRACE,
3530                                   "Unix socket fd=%d",
3531                                   fd);
3532                 }
3533                 return fd;
3534         default:
3535                 errno = EAFNOSUPPORT;
3536                 return -1;
3537         }
3538
3539         switch (real_type) {
3540         case SOCK_STREAM:
3541                 break;
3542         case SOCK_DGRAM:
3543                 break;
3544         default:
3545                 errno = EPROTONOSUPPORT;
3546                 return -1;
3547         }
3548
3549         switch (protocol) {
3550         case 0:
3551                 break;
3552         case 6:
3553                 if (real_type == SOCK_STREAM) {
3554                         break;
3555                 }
3556                 FALL_THROUGH;
3557         case 17:
3558                 if (real_type == SOCK_DGRAM) {
3559                         break;
3560                 }
3561                 FALL_THROUGH;
3562         default:
3563                 errno = EPROTONOSUPPORT;
3564                 return -1;
3565         }
3566
3567         /*
3568          * We must call libc_socket with type, from the caller, not the version
3569          * we removed SOCK_CLOEXEC and SOCK_NONBLOCK from
3570          */
3571         fd = libc_socket(AF_UNIX, type, 0);
3572
3573         if (fd == -1) {
3574                 return -1;
3575         }
3576
3577         /* Check if we have a stale fd and remove it */
3578         swrap_remove_stale(fd);
3579
3580         si = &_si;
3581         si->family = family;
3582
3583         /* however, the rest of the socket_wrapper code expects just
3584          * the type, not the flags */
3585         si->type = real_type;
3586         si->protocol = protocol;
3587
3588         /*
3589          * Setup myname so getsockname() can succeed to find out the socket
3590          * type.
3591          */
3592         switch(si->family) {
3593         case AF_INET: {
3594                 struct sockaddr_in sin = {
3595                         .sin_family = AF_INET,
3596                 };
3597
3598                 si->myname.sa_socklen = sizeof(struct sockaddr_in);
3599                 memcpy(&si->myname.sa.in, &sin, si->myname.sa_socklen);
3600                 break;
3601         }
3602 #ifdef HAVE_IPV6
3603         case AF_INET6: {
3604                 struct sockaddr_in6 sin6 = {
3605                         .sin6_family = AF_INET6,
3606                 };
3607
3608                 si->myname.sa_socklen = sizeof(struct sockaddr_in6);
3609                 memcpy(&si->myname.sa.in6, &sin6, si->myname.sa_socklen);
3610                 break;
3611         }
3612 #endif
3613         default:
3614                 errno = EINVAL;
3615                 return -1;
3616         }
3617
3618         ret = swrap_create_socket(si, fd);
3619         if (ret == -1) {
3620                 int saved_errno = errno;
3621                 libc_close(fd);
3622                 errno = saved_errno;
3623                 return -1;
3624         }
3625
3626         SWRAP_LOG(SWRAP_LOG_TRACE,
3627                   "Created %s socket for protocol %s, fd=%d",
3628                   family == AF_INET ? "IPv4" : "IPv6",
3629                   real_type == SOCK_DGRAM ? "UDP" : "TCP",
3630                   fd);
3631
3632         return fd;
3633 }
3634
3635 int socket(int family, int type, int protocol)
3636 {
3637         return swrap_socket(family, type, protocol);
3638 }
3639
3640 /****************************************************************************
3641  *   SOCKETPAIR
3642  ***************************************************************************/
3643
3644 static int swrap_socketpair(int family, int type, int protocol, int sv[2])
3645 {
3646         int rc;
3647
3648         rc = libc_socketpair(family, type, protocol, sv);
3649         if (rc != -1) {
3650                 swrap_remove_stale(sv[0]);
3651                 swrap_remove_stale(sv[1]);
3652         }
3653
3654         return rc;
3655 }
3656
3657 int socketpair(int family, int type, int protocol, int sv[2])
3658 {
3659         return swrap_socketpair(family, type, protocol, sv);
3660 }
3661
3662 /****************************************************************************
3663  *   SOCKETPAIR
3664  ***************************************************************************/
3665
3666 #ifdef HAVE_TIMERFD_CREATE
3667 static int swrap_timerfd_create(int clockid, int flags)
3668 {
3669         int fd;
3670
3671         fd = libc_timerfd_create(clockid, flags);
3672         if (fd != -1) {
3673                 swrap_remove_stale(fd);
3674         }
3675
3676         return fd;
3677 }
3678
3679 int timerfd_create(int clockid, int flags)
3680 {
3681         return swrap_timerfd_create(clockid, flags);
3682 }
3683 #endif
3684
3685 /****************************************************************************
3686  *   PIPE
3687  ***************************************************************************/
3688
3689 static int swrap_pipe(int pipefd[2])
3690 {
3691         int rc;
3692
3693         rc = libc_pipe(pipefd);
3694         if (rc != -1) {
3695                 swrap_remove_stale(pipefd[0]);
3696                 swrap_remove_stale(pipefd[1]);
3697         }
3698
3699         return rc;
3700 }
3701
3702 int pipe(int pipefd[2])
3703 {
3704         return swrap_pipe(pipefd);
3705 }
3706
3707 /****************************************************************************
3708  *   ACCEPT
3709  ***************************************************************************/
3710
3711 static int swrap_accept(int s,
3712                         struct sockaddr *addr,
3713                         socklen_t *addrlen,
3714                         int flags)
3715 {
3716         struct socket_info *parent_si, *child_si;
3717         struct socket_info new_si = { 0 };
3718         int fd;
3719         int idx;
3720         struct swrap_address un_addr = {
3721                 .sa_socklen = sizeof(struct sockaddr_un),
3722         };
3723         struct swrap_address un_my_addr = {
3724                 .sa_socklen = sizeof(struct sockaddr_un),
3725         };
3726         struct swrap_address in_addr = {
3727                 .sa_socklen = sizeof(struct sockaddr_storage),
3728         };
3729         struct swrap_address in_my_addr = {
3730                 .sa_socklen = sizeof(struct sockaddr_storage),
3731         };
3732         int ret;
3733
3734         parent_si = find_socket_info(s);
3735         if (!parent_si) {
3736 #ifdef HAVE_ACCEPT4
3737                 return libc_accept4(s, addr, addrlen, flags);
3738 #else
3739                 UNUSED(flags);
3740                 return libc_accept(s, addr, addrlen);
3741 #endif
3742         }
3743
3744
3745         /*
3746          * prevent parent_si from being altered / closed
3747          * while we read it
3748          */
3749         SWRAP_LOCK_SI(parent_si);
3750
3751         /*
3752          * assume out sockaddr have the same size as the in parent
3753          * socket family
3754          */
3755         in_addr.sa_socklen = socket_length(parent_si->family);
3756         if (in_addr.sa_socklen <= 0) {
3757                 SWRAP_UNLOCK_SI(parent_si);
3758                 errno = EINVAL;
3759                 return -1;
3760         }
3761
3762         SWRAP_UNLOCK_SI(parent_si);
3763
3764 #ifdef HAVE_ACCEPT4
3765         ret = libc_accept4(s, &un_addr.sa.s, &un_addr.sa_socklen, flags);
3766 #else
3767         UNUSED(flags);
3768         ret = libc_accept(s, &un_addr.sa.s, &un_addr.sa_socklen);
3769 #endif
3770         if (ret == -1) {
3771                 int saved_errno = errno;
3772                 if (saved_errno == ENOTSOCK) {
3773                         /* Remove stale fds */
3774                         swrap_remove_stale(s);
3775                 }
3776                 errno = saved_errno;
3777                 return ret;
3778         }
3779
3780         fd = ret;
3781
3782         /* Check if we have a stale fd and remove it */
3783         swrap_remove_stale(fd);
3784
3785         if (un_addr.sa.un.sun_path[0] == '\0') {
3786                 /*
3787                  * FreeBSD seems to have a problem where
3788                  * accept4() on the unix socket doesn't
3789                  * ECONNABORTED for already disconnected connections.
3790                  *
3791                  * Let's try libc_getpeername() to get the peer address
3792                  * as a fallback, but it'll likely return ENOTCONN,
3793                  * which we have to map to ECONNABORTED.
3794                  */
3795                 un_addr.sa_socklen = sizeof(struct sockaddr_un),
3796                 ret = libc_getpeername(fd, &un_addr.sa.s, &un_addr.sa_socklen);
3797                 if (ret == -1) {
3798                         int saved_errno = errno;
3799                         libc_close(fd);
3800                         if (saved_errno == ENOTCONN) {
3801                                 /*
3802                                  * If the connection is already disconnected
3803                                  * we should return ECONNABORTED.
3804                                  */
3805                                 saved_errno = ECONNABORTED;
3806                         }
3807                         errno = saved_errno;
3808                         return ret;
3809                 }
3810         }
3811
3812         ret = libc_getsockname(fd,
3813                                &un_my_addr.sa.s,
3814                                &un_my_addr.sa_socklen);
3815         if (ret == -1) {
3816                 int saved_errno = errno;
3817                 libc_close(fd);
3818                 if (saved_errno == ENOTCONN) {
3819                         /*
3820                          * If the connection is already disconnected
3821                          * we should return ECONNABORTED.
3822                          */
3823                         saved_errno = ECONNABORTED;
3824                 }
3825                 errno = saved_errno;
3826                 return ret;
3827         }
3828
3829         SWRAP_LOCK_SI(parent_si);
3830
3831         ret = sockaddr_convert_from_un(parent_si,
3832                                        &un_addr.sa.un,
3833                                        un_addr.sa_socklen,
3834                                        parent_si->family,
3835                                        &in_addr.sa.s,
3836                                        &in_addr.sa_socklen);
3837         if (ret == -1) {
3838                 int saved_errno = errno;
3839                 SWRAP_UNLOCK_SI(parent_si);
3840                 libc_close(fd);
3841                 errno = saved_errno;
3842                 return ret;
3843         }
3844
3845         child_si = &new_si;
3846
3847         child_si->family = parent_si->family;
3848         child_si->type = parent_si->type;
3849         child_si->protocol = parent_si->protocol;
3850         child_si->bound = 1;
3851         child_si->is_server = 1;
3852         child_si->connected = 1;
3853
3854         SWRAP_UNLOCK_SI(parent_si);
3855
3856         child_si->peername = (struct swrap_address) {
3857                 .sa_socklen = in_addr.sa_socklen,
3858         };
3859         memcpy(&child_si->peername.sa.ss, &in_addr.sa.ss, in_addr.sa_socklen);
3860
3861         if (addr != NULL && addrlen != NULL) {
3862                 size_t copy_len = MIN(*addrlen, in_addr.sa_socklen);
3863                 if (copy_len > 0) {
3864                         memcpy(addr, &in_addr.sa.ss, copy_len);
3865                 }
3866                 *addrlen = in_addr.sa_socklen;
3867         }
3868
3869         ret = sockaddr_convert_from_un(child_si,
3870                                        &un_my_addr.sa.un,
3871                                        un_my_addr.sa_socklen,
3872                                        child_si->family,
3873                                        &in_my_addr.sa.s,
3874                                        &in_my_addr.sa_socklen);
3875         if (ret == -1) {
3876                 int saved_errno = errno;
3877                 libc_close(fd);
3878                 errno = saved_errno;
3879                 return ret;
3880         }
3881
3882         SWRAP_LOG(SWRAP_LOG_TRACE,
3883                   "accept() path=%s, fd=%d",
3884                   un_my_addr.sa.un.sun_path, s);
3885
3886         child_si->myname = (struct swrap_address) {
3887                 .sa_socklen = in_my_addr.sa_socklen,
3888         };
3889         memcpy(&child_si->myname.sa.ss, &in_my_addr.sa.ss, in_my_addr.sa_socklen);
3890
3891         idx = swrap_create_socket(&new_si, fd);
3892         if (idx == -1) {
3893                 int saved_errno = errno;
3894                 libc_close(fd);
3895                 errno = saved_errno;
3896                 return -1;
3897         }
3898
3899         if (addr != NULL) {
3900                 struct socket_info *si = swrap_get_socket_info(idx);
3901
3902                 SWRAP_LOCK_SI(si);
3903                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_SEND, NULL, 0);
3904                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_RECV, NULL, 0);
3905                 swrap_pcap_dump_packet(si, addr, SWRAP_ACCEPT_ACK, NULL, 0);
3906                 SWRAP_UNLOCK_SI(si);
3907         }
3908
3909         return fd;
3910 }
3911
3912 #ifdef HAVE_ACCEPT4
3913 int accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags)
3914 {
3915         return swrap_accept(s, addr, (socklen_t *)addrlen, flags);
3916 }
3917 #endif
3918
3919 #ifdef HAVE_ACCEPT_PSOCKLEN_T
3920 int accept(int s, struct sockaddr *addr, Psocklen_t addrlen)
3921 #else
3922 int accept(int s, struct sockaddr *addr, socklen_t *addrlen)
3923 #endif
3924 {
3925         return swrap_accept(s, addr, (socklen_t *)addrlen, 0);
3926 }
3927
3928 static int autobind_start_init;
3929 static int autobind_start;
3930
3931 /* using sendto() or connect() on an unbound socket would give the
3932    recipient no way to reply, as unlike UDP and TCP, a unix domain
3933    socket can't auto-assign ephemeral port numbers, so we need to
3934    assign it here.
3935    Note: this might change the family from ipv6 to ipv4
3936 */
3937 static int swrap_auto_bind(int fd, struct socket_info *si, int family)
3938 {
3939         struct swrap_address un_addr = {
3940                 .sa_socklen = sizeof(struct sockaddr_un),
3941         };
3942         int i;
3943         char type;
3944         int ret;
3945         int port;
3946         char *swrap_dir = NULL;
3947
3948         swrap_mutex_lock(&autobind_start_mutex);
3949
3950         if (autobind_start_init != 1) {
3951                 autobind_start_init = 1;
3952                 autobind_start = getpid();
3953                 autobind_start %= 50000;
3954                 autobind_start += 10000;
3955         }
3956
3957         un_addr.sa.un.sun_family = AF_UNIX;
3958
3959         switch (family) {
3960         case AF_INET: {
3961                 struct sockaddr_in in;
3962
3963                 switch (si->type) {
3964                 case SOCK_STREAM:
3965                         type = SOCKET_TYPE_CHAR_TCP;
3966                         break;
3967                 case SOCK_DGRAM:
3968                         type = SOCKET_TYPE_CHAR_UDP;
3969                         break;
3970                 default:
3971                         errno = ESOCKTNOSUPPORT;
3972                         ret = -1;
3973                         goto done;
3974                 }
3975
3976                 memset(&in, 0, sizeof(in));
3977                 in.sin_family = AF_INET;
3978                 in.sin_addr.s_addr = htonl(swrap_ipv4_iface(
3979                                            socket_wrapper_default_iface()));
3980
3981                 si->myname = (struct swrap_address) {
3982                         .sa_socklen = sizeof(in),
3983                 };
3984                 memcpy(&si->myname.sa.in, &in, si->myname.sa_socklen);
3985                 break;
3986         }
3987 #ifdef HAVE_IPV6
3988         case AF_INET6: {
3989                 struct sockaddr_in6 in6;
3990
3991                 if (si->family != family) {
3992                         errno = ENETUNREACH;
3993                         ret = -1;
3994                         goto done;
3995                 }
3996
3997                 switch (si->type) {
3998                 case SOCK_STREAM:
3999                         type = SOCKET_TYPE_CHAR_TCP_V6;
4000                         break;
4001                 case SOCK_DGRAM:
4002                         type = SOCKET_TYPE_CHAR_UDP_V6;
4003                         break;
4004                 default:
4005                         errno = ESOCKTNOSUPPORT;
4006                         ret = -1;
4007                         goto done;
4008                 }
4009
4010                 memset(&in6, 0, sizeof(in6));
4011                 in6.sin6_family = AF_INET6;
4012                 in6.sin6_addr = *swrap_ipv6();
4013                 in6.sin6_addr.s6_addr[15] = socket_wrapper_default_iface();
4014
4015                 si->myname = (struct swrap_address) {
4016                         .sa_socklen = sizeof(in6),
4017                 };
4018                 memcpy(&si->myname.sa.in6, &in6, si->myname.sa_socklen);
4019                 break;
4020         }
4021 #endif
4022         default:
4023                 errno = ESOCKTNOSUPPORT;
4024                 ret = -1;
4025                 goto done;
4026         }
4027
4028         if (autobind_start > 60000) {
4029                 autobind_start = 10000;
4030         }
4031
4032         swrap_dir = socket_wrapper_dir();
4033         if (swrap_dir == NULL) {
4034                 errno = EINVAL;
4035                 ret = -1;
4036                 goto done;
4037         }
4038
4039         for (i = 0; i < SOCKET_MAX_SOCKETS; i++) {
4040                 port = autobind_start + i;
4041                 swrap_un_path(&un_addr.sa.un,
4042                               swrap_dir,
4043                               type,
4044                               socket_wrapper_default_iface(),
4045                               port);
4046
4047                 ret = libc_bind(fd, &un_addr.sa.s, un_addr.sa_socklen);
4048                 if (ret == -1) {
4049                         if (errno == EALREADY || errno == EADDRINUSE) {
4050                                 continue;
4051                         }
4052                         goto done;
4053                 }
4054
4055                 si->un_addr = un_addr.sa.un;
4056
4057                 si->bound = 1;
4058                 autobind_start = port + 1;
4059                 break;
4060         }
4061         if (i == SOCKET_MAX_SOCKETS) {
4062                 SWRAP_LOG(SWRAP_LOG_ERROR, "Too many open unix sockets (%u) for "
4063                                            "interface "SOCKET_FORMAT,
4064                                            SOCKET_MAX_SOCKETS,
4065                                            type,
4066                                            socket_wrapper_default_iface(),
4067                                            0);
4068                 errno = ENFILE;
4069                 ret = -1;
4070                 goto done;
4071         }
4072
4073         si->family = family;
4074         set_port(si->family, port, &si->myname);
4075
4076         ret = 0;
4077
4078 done:
4079         SAFE_FREE(swrap_dir);
4080         swrap_mutex_unlock(&autobind_start_mutex);
4081         return ret;
4082 }
4083
4084 /****************************************************************************
4085  *   CONNECT
4086  ***************************************************************************/
4087
4088 static int swrap_connect(int s, const struct sockaddr *serv_addr,
4089                          socklen_t addrlen)
4090 {
4091         int ret;
4092         struct swrap_address un_addr = {
4093                 .sa_socklen = sizeof(struct sockaddr_un),
4094         };
4095         struct socket_info *si = find_socket_info(s);
4096         struct swrap_sockaddr_buf buf = {};
4097         int bcast = 0;
4098
4099         if (!si) {
4100                 return libc_connect(s, serv_addr, addrlen);
4101         }
4102
4103         SWRAP_LOCK_SI(si);
4104
4105         if (si->bound == 0) {
4106                 ret = swrap_auto_bind(s, si, serv_addr->sa_family);
4107                 if (ret == -1) {
4108                         goto done;
4109                 }
4110         }
4111
4112         if (si->family != serv_addr->sa_family) {
4113                 SWRAP_LOG(SWRAP_LOG_ERROR,
4114                           "called for fd=%d (family=%d) called with invalid family=%d",
4115                           s, si->family, serv_addr->sa_family);
4116                 errno = EINVAL;
4117                 ret = -1;
4118                 goto done;
4119         }
4120
4121         ret = sockaddr_convert_to_un(si, serv_addr,
4122                                      addrlen, &un_addr.sa.un, 0, &bcast);
4123         if (ret == -1) {
4124                 goto done;
4125         }
4126
4127         if (bcast) {
4128                 errno = ENETUNREACH;
4129                 ret = -1;
4130                 goto done;
4131         }
4132
4133         if (si->type == SOCK_DGRAM) {
4134                 si->defer_connect = 1;
4135                 ret = 0;
4136         } else {
4137                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_SEND, NULL, 0);
4138
4139                 ret = libc_connect(s,
4140                                    &un_addr.sa.s,
4141                                    un_addr.sa_socklen);
4142         }
4143
4144         SWRAP_LOG(SWRAP_LOG_TRACE,
4145                   "connect(%s) path=%s, fd=%d",
4146                   swrap_sockaddr_string(&buf, serv_addr),
4147                   un_addr.sa.un.sun_path, s);
4148
4149
4150         /* to give better errors */
4151         if (ret == -1 && errno == ENOENT) {
4152                 errno = EHOSTUNREACH;
4153         }
4154
4155         if (ret == 0) {
4156                 si->peername = (struct swrap_address) {
4157                         .sa_socklen = addrlen,
4158                 };
4159
4160                 memcpy(&si->peername.sa.ss, serv_addr, addrlen);
4161                 si->connected = 1;
4162
4163                 /*
4164                  * When we connect() on a socket than we have to bind the
4165                  * outgoing connection on the interface we use for the
4166                  * transport. We already bound it on the right interface
4167                  * but here we have to update the name so getsockname()
4168                  * returns correct information.
4169                  */
4170                 if (si->bindname.sa_socklen > 0) {
4171                         si->myname = (struct swrap_address) {
4172                                 .sa_socklen = si->bindname.sa_socklen,
4173                         };
4174
4175                         memcpy(&si->myname.sa.ss,
4176                                &si->bindname.sa.ss,
4177                                si->bindname.sa_socklen);
4178
4179                         /* Cleanup bindname */
4180                         si->bindname = (struct swrap_address) {
4181                                 .sa_socklen = 0,
4182                         };
4183                 }
4184
4185                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_RECV, NULL, 0);
4186                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_ACK, NULL, 0);
4187         } else {
4188                 swrap_pcap_dump_packet(si, serv_addr, SWRAP_CONNECT_UNREACH, NULL, 0);
4189         }
4190
4191 done:
4192         SWRAP_UNLOCK_SI(si);
4193         return ret;
4194 }
4195
4196 int connect(int s, const struct sockaddr *serv_addr, socklen_t addrlen)
4197 {
4198         return swrap_connect(s, serv_addr, addrlen);
4199 }
4200
4201 /****************************************************************************
4202  *   BIND
4203  ***************************************************************************/
4204
4205 static int swrap_bind(int s, const struct sockaddr *myaddr, socklen_t addrlen)
4206 {
4207         int ret;
4208         struct swrap_address un_addr = {
4209                 .sa_socklen = sizeof(struct sockaddr_un),
4210         };
4211         struct socket_info *si = find_socket_info(s);
4212         struct swrap_sockaddr_buf buf = {};
4213         int ret_errno = errno;
4214         int bind_error = 0;
4215 #if 0 /* FIXME */
4216         bool in_use;
4217 #endif
4218
4219         if (!si) {
4220                 return libc_bind(s, myaddr, addrlen);
4221         }
4222
4223         SWRAP_LOCK_SI(si);
4224
4225         switch (si->family) {
4226         case AF_INET: {
4227                 const struct sockaddr_in *sin;
4228                 if (addrlen < sizeof(struct sockaddr_in)) {
4229                         bind_error = EINVAL;
4230                         break;
4231                 }
4232
4233                 sin = (const struct sockaddr_in *)(const void *)myaddr;
4234
4235                 if (sin->sin_family != AF_INET) {
4236                         bind_error = EAFNOSUPPORT;
4237                 }
4238
4239                 /* special case for AF_UNSPEC */
4240                 if (sin->sin_family == AF_UNSPEC &&
4241                     (sin->sin_addr.s_addr == htonl(INADDR_ANY)))
4242                 {
4243                         bind_error = 0;
4244                 }
4245
4246                 break;
4247         }
4248 #ifdef HAVE_IPV6
4249         case AF_INET6: {
4250                 const struct sockaddr_in6 *sin6;
4251                 if (addrlen < sizeof(struct sockaddr_in6)) {
4252                         bind_error = EINVAL;
4253                         break;
4254                 }
4255
4256                 sin6 = (const struct sockaddr_in6 *)(const void *)myaddr;
4257
4258                 if (sin6->sin6_family != AF_INET6) {
4259                         bind_error = EAFNOSUPPORT;
4260                 }
4261
4262                 break;
4263         }
4264 #endif
4265         default:
4266                 bind_error = EINVAL;
4267                 break;
4268         }
4269
4270         if (bind_error != 0) {
4271                 ret_errno = bind_error;
4272                 ret = -1;
4273                 goto out;
4274         }
4275
4276 #if 0 /* FIXME */
4277         in_use = check_addr_port_in_use(myaddr, addrlen);
4278         if (in_use) {
4279                 errno = EADDRINUSE;
4280                 ret = -1;
4281                 goto out;
4282         }
4283 #endif
4284
4285         si->myname.sa_socklen = addrlen;
4286         memcpy(&si->myname.sa.ss, myaddr, addrlen);
4287
4288         ret = sockaddr_convert_to_un(si,
4289                                      myaddr,
4290                                      addrlen,
4291                                      &un_addr.sa.un,
4292                                      1,
4293                                      &si->bcast);
4294         if (ret == -1) {
4295                 ret_errno = errno;
4296                 goto out;
4297         }
4298
4299         unlink(un_addr.sa.un.sun_path);
4300
4301         ret = libc_bind(s, &un_addr.sa.s, un_addr.sa_socklen);
4302         if (ret == -1) {
4303                 ret_errno = errno;
4304         }
4305
4306         SWRAP_LOG(SWRAP_LOG_TRACE,
4307                   "bind(%s) path=%s, fd=%d ret=%d ret_errno=%d",
4308                   swrap_sockaddr_string(&buf, myaddr),
4309                   un_addr.sa.un.sun_path, s, ret, ret_errno);
4310
4311         if (ret == 0) {
4312                 si->bound = 1;
4313         }
4314
4315 out:
4316         SWRAP_UNLOCK_SI(si);
4317         errno = ret_errno;
4318         return ret;
4319 }
4320
4321 int bind(int s, const struct sockaddr *myaddr, socklen_t addrlen)
4322 {
4323         return swrap_bind(s, myaddr, addrlen);
4324 }
4325
4326 /****************************************************************************
4327  *   BINDRESVPORT
4328  ***************************************************************************/
4329
4330 #ifdef HAVE_BINDRESVPORT
4331 static int swrap_getsockname(int s, struct sockaddr *name, socklen_t *addrlen);
4332
4333 static int swrap_bindresvport_sa(int sd, struct sockaddr *sa)
4334 {
4335         struct swrap_address myaddr = {
4336                 .sa_socklen = sizeof(struct sockaddr_storage),
4337         };
4338         socklen_t salen;
4339         static uint16_t port;
4340         uint16_t i;
4341         int rc = -1;
4342         int af;
4343
4344 #define SWRAP_STARTPORT 600
4345 #define SWRAP_ENDPORT (IPPORT_RESERVED - 1)
4346 #define SWRAP_NPORTS (SWRAP_ENDPORT - SWRAP_STARTPORT + 1)
4347
4348         if (port == 0) {
4349                 port = (getpid() % SWRAP_NPORTS) + SWRAP_STARTPORT;
4350         }
4351
4352         if (sa == NULL) {
4353                 salen = myaddr.sa_socklen;
4354                 sa = &myaddr.sa.s;
4355
4356                 rc = swrap_getsockname(sd, &myaddr.sa.s, &salen);
4357                 if (rc < 0) {
4358                         return -1;
4359                 }
4360
4361                 af = sa->sa_family;
4362                 memset(&myaddr.sa.ss, 0, salen);
4363         } else {
4364                 af = sa->sa_family;
4365         }
4366
4367         for (i = 0; i < SWRAP_NPORTS; i++, port++) {
4368                 switch(af) {
4369                 case AF_INET: {
4370                         struct sockaddr_in *sinp = (struct sockaddr_in *)(void *)sa;
4371
4372                         salen = sizeof(struct sockaddr_in);
4373                         sinp->sin_port = htons(port);
4374                         break;
4375                 }
4376                 case AF_INET6: {
4377                         struct sockaddr_in6 *sin6p = (struct sockaddr_in6 *)(void *)sa;
4378
4379                         salen = sizeof(struct sockaddr_in6);
4380                         sin6p->sin6_port = htons(port);
4381                         break;
4382                 }
4383                 default:
4384                         errno = EAFNOSUPPORT;
4385                         return -1;
4386                 }
4387                 sa->sa_family = af;
4388
4389                 if (port > SWRAP_ENDPORT) {
4390                         port = SWRAP_STARTPORT;
4391                 }
4392
4393                 rc = swrap_bind(sd, (struct sockaddr *)sa, salen);
4394                 if (rc == 0 || errno != EADDRINUSE) {
4395                         break;
4396                 }
4397         }
4398
4399         return rc;
4400 }
4401
4402 int bindresvport(int sockfd, struct sockaddr_in *sinp)
4403 {
4404         return swrap_bindresvport_sa(sockfd, (struct sockaddr *)sinp);
4405 }
4406 #endif
4407
4408 /****************************************************************************
4409  *   LISTEN
4410  ***************************************************************************/
4411
4412 static int swrap_listen(int s, int backlog)
4413 {
4414         int ret;
4415         struct socket_info *si = find_socket_info(s);
4416
4417         if (!si) {
4418                 return libc_listen(s, backlog);
4419         }
4420
4421         SWRAP_LOCK_SI(si);
4422
4423         if (si->bound == 0) {
4424                 ret = swrap_auto_bind(s, si, si->family);
4425                 if (ret == -1) {
4426                         errno = EADDRINUSE;
4427                         goto out;
4428                 }
4429         }
4430
4431         ret = libc_listen(s, backlog);
4432         if (ret == 0) {
4433                 si->listening = 1;
4434         }
4435
4436 out:
4437         SWRAP_UNLOCK_SI(si);
4438
4439         return ret;
4440 }
4441
4442 int listen(int s, int backlog)
4443 {
4444         return swrap_listen(s, backlog);
4445 }
4446
4447 /****************************************************************************
4448  *   FOPEN
4449  ***************************************************************************/
4450
4451 static FILE *swrap_fopen(const char *name, const char *mode)
4452 {
4453         FILE *fp;
4454
4455         fp = libc_fopen(name, mode);
4456         if (fp != NULL) {
4457                 int fd = fileno(fp);
4458
4459                 swrap_remove_stale(fd);
4460         }
4461
4462         return fp;
4463 }
4464
4465 FILE *fopen(const char *name, const char *mode)
4466 {
4467         return swrap_fopen(name, mode);
4468 }
4469
4470 /****************************************************************************
4471  *   FOPEN64
4472  ***************************************************************************/
4473
4474 #ifdef HAVE_FOPEN64
4475 static FILE *swrap_fopen64(const char *name, const char *mode)
4476 {
4477         FILE *fp;
4478
4479         fp = libc_fopen64(name, mode);
4480         if (fp != NULL) {
4481                 int fd = fileno(fp);
4482
4483                 swrap_remove_stale(fd);
4484         }
4485
4486         return fp;
4487 }
4488
4489 FILE *fopen64(const char *name, const char *mode)
4490 {
4491         return swrap_fopen64(name, mode);
4492 }
4493 #endif /* HAVE_FOPEN64 */
4494
4495 /****************************************************************************
4496  *   OPEN
4497  ***************************************************************************/
4498
4499 static int swrap_vopen(const char *pathname, int flags, va_list ap)
4500 {
4501         int ret;
4502
4503         ret = libc_vopen(pathname, flags, ap);
4504         if (ret != -1) {
4505                 /*
4506                  * There are methods for closing descriptors (libc-internal code
4507                  * paths, direct syscalls) which close descriptors in ways that
4508                  * we can't intercept, so try to recover when we notice that
4509                  * that's happened
4510                  */
4511                 swrap_remove_stale(ret);
4512         }
4513         return ret;
4514 }
4515
4516 int open(const char *pathname, int flags, ...)
4517 {
4518         va_list ap;
4519         int fd;
4520
4521         va_start(ap, flags);
4522         fd = swrap_vopen(pathname, flags, ap);
4523         va_end(ap);
4524
4525         return fd;
4526 }
4527
4528 /****************************************************************************
4529  *   OPEN64
4530  ***************************************************************************/
4531
4532 #ifdef HAVE_OPEN64
4533 static int swrap_vopen64(const char *pathname, int flags, va_list ap)
4534 {
4535         int ret;
4536
4537         ret = libc_vopen64(pathname, flags, ap);
4538         if (ret != -1) {
4539                 /*
4540                  * There are methods for closing descriptors (libc-internal code
4541                  * paths, direct syscalls) which close descriptors in ways that
4542                  * we can't intercept, so try to recover when we notice that
4543                  * that's happened
4544                  */
4545                 swrap_remove_stale(ret);
4546         }
4547         return ret;
4548 }
4549
4550 int open64(const char *pathname, int flags, ...)
4551 {
4552         va_list ap;
4553         int fd;
4554
4555         va_start(ap, flags);
4556         fd = swrap_vopen64(pathname, flags, ap);
4557         va_end(ap);
4558
4559         return fd;
4560 }
4561 #endif /* HAVE_OPEN64 */
4562
4563 /****************************************************************************
4564  *   OPENAT
4565  ***************************************************************************/
4566
4567 static int swrap_vopenat(int dirfd, const char *path, int flags, va_list ap)
4568 {
4569         int ret;
4570
4571         ret = libc_vopenat(dirfd, path, flags, ap);
4572         if (ret != -1) {
4573                 /*
4574                  * There are methods for closing descriptors (libc-internal code
4575                  * paths, direct syscalls) which close descriptors in ways that
4576                  * we can't intercept, so try to recover when we notice that
4577                  * that's happened
4578                  */
4579                 swrap_remove_stale(ret);
4580         }
4581
4582         return ret;
4583 }
4584
4585 int openat(int dirfd, const char *path, int flags, ...)
4586 {
4587         va_list ap;
4588         int fd;
4589
4590         va_start(ap, flags);
4591         fd = swrap_vopenat(dirfd, path, flags, ap);
4592         va_end(ap);
4593
4594         return fd;
4595 }
4596
4597 /****************************************************************************
4598  *   GETPEERNAME
4599  ***************************************************************************/
4600
4601 static int swrap_getpeername(int s, struct sockaddr *name, socklen_t *addrlen)
4602 {
4603         struct socket_info *si = find_socket_info(s);
4604         socklen_t len;
4605         int ret = -1;
4606
4607         if (!si) {
4608                 return libc_getpeername(s, name, addrlen);
4609         }
4610
4611         SWRAP_LOCK_SI(si);
4612
4613         if (si->peername.sa_socklen == 0)
4614         {
4615                 errno = ENOTCONN;
4616                 goto out;
4617         }
4618
4619         len = MIN(*addrlen, si->peername.sa_socklen);
4620         if (len == 0) {
4621                 ret = 0;
4622                 goto out;
4623         }
4624
4625         memcpy(name, &si->peername.sa.ss, len);
4626         *addrlen = si->peername.sa_socklen;
4627
4628         ret = 0;
4629 out:
4630         SWRAP_UNLOCK_SI(si);
4631
4632         return ret;
4633 }
4634
4635 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4636 int getpeername(int s, struct sockaddr *name, Psocklen_t addrlen)
4637 #else
4638 int getpeername(int s, struct sockaddr *name, socklen_t *addrlen)
4639 #endif
4640 {
4641         return swrap_getpeername(s, name, (socklen_t *)addrlen);
4642 }
4643
4644 /****************************************************************************
4645  *   GETSOCKNAME
4646  ***************************************************************************/
4647
4648 static int swrap_getsockname(int s, struct sockaddr *name, socklen_t *addrlen)
4649 {
4650         struct socket_info *si = find_socket_info(s);
4651         socklen_t len;
4652         int ret = -1;
4653
4654         if (!si) {
4655                 return libc_getsockname(s, name, addrlen);
4656         }
4657
4658         SWRAP_LOCK_SI(si);
4659
4660         len = MIN(*addrlen, si->myname.sa_socklen);
4661         if (len == 0) {
4662                 ret = 0;
4663                 goto out;
4664         }
4665
4666         memcpy(name, &si->myname.sa.ss, len);
4667         *addrlen = si->myname.sa_socklen;
4668
4669         ret = 0;
4670 out:
4671         SWRAP_UNLOCK_SI(si);
4672
4673         return ret;
4674 }
4675
4676 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4677 int getsockname(int s, struct sockaddr *name, Psocklen_t addrlen)
4678 #else
4679 int getsockname(int s, struct sockaddr *name, socklen_t *addrlen)
4680 #endif
4681 {
4682         return swrap_getsockname(s, name, (socklen_t *)addrlen);
4683 }
4684
4685 /****************************************************************************
4686  *   GETSOCKOPT
4687  ***************************************************************************/
4688
4689 #ifndef SO_PROTOCOL
4690 # ifdef SO_PROTOTYPE /* The Solaris name */
4691 #  define SO_PROTOCOL SO_PROTOTYPE
4692 # endif /* SO_PROTOTYPE */
4693 #endif /* SO_PROTOCOL */
4694
4695 static int swrap_getsockopt(int s, int level, int optname,
4696                             void *optval, socklen_t *optlen)
4697 {
4698         struct socket_info *si = find_socket_info(s);
4699         int ret;
4700
4701         if (!si) {
4702                 return libc_getsockopt(s,
4703                                        level,
4704                                        optname,
4705                                        optval,
4706                                        optlen);
4707         }
4708
4709         SWRAP_LOCK_SI(si);
4710
4711         if (level == SOL_SOCKET) {
4712                 switch (optname) {
4713 #ifdef SO_DOMAIN
4714                 case SO_DOMAIN:
4715                         if (optval == NULL || optlen == NULL ||
4716                             *optlen < (socklen_t)sizeof(int)) {
4717                                 errno = EINVAL;
4718                                 ret = -1;
4719                                 goto done;
4720                         }
4721
4722                         *optlen = sizeof(int);
4723                         *(int *)optval = si->family;
4724                         ret = 0;
4725                         goto done;
4726 #endif /* SO_DOMAIN */
4727
4728 #ifdef SO_PROTOCOL
4729                 case SO_PROTOCOL:
4730                         if (optval == NULL || optlen == NULL ||
4731                             *optlen < (socklen_t)sizeof(int)) {
4732                                 errno = EINVAL;
4733                                 ret = -1;
4734                                 goto done;
4735                         }
4736
4737                         *optlen = sizeof(int);
4738                         *(int *)optval = si->protocol;
4739                         ret = 0;
4740                         goto done;
4741 #endif /* SO_PROTOCOL */
4742                 case SO_TYPE:
4743                         if (optval == NULL || optlen == NULL ||
4744                             *optlen < (socklen_t)sizeof(int)) {
4745                                 errno = EINVAL;
4746                                 ret = -1;
4747                                 goto done;
4748                         }
4749
4750                         *optlen = sizeof(int);
4751                         *(int *)optval = si->type;
4752                         ret = 0;
4753                         goto done;
4754                 default:
4755                         ret = libc_getsockopt(s,
4756                                               level,
4757                                               optname,
4758                                               optval,
4759                                               optlen);
4760                         goto done;
4761                 }
4762         } else if (level == IPPROTO_TCP) {
4763                 switch (optname) {
4764 #ifdef TCP_NODELAY
4765                 case TCP_NODELAY:
4766                         /*
4767                          * This enables sending packets directly out over TCP.
4768                          * As a unix socket is doing that any way, report it as
4769                          * enabled.
4770                          */
4771                         if (optval == NULL || optlen == NULL ||
4772                             *optlen < (socklen_t)sizeof(int)) {
4773                                 errno = EINVAL;
4774                                 ret = -1;
4775                                 goto done;
4776                         }
4777
4778                         *optlen = sizeof(int);
4779                         *(int *)optval = si->tcp_nodelay;
4780
4781                         ret = 0;
4782                         goto done;
4783 #endif /* TCP_NODELAY */
4784 #ifdef TCP_INFO
4785                 case TCP_INFO: {
4786                         struct tcp_info info;
4787                         socklen_t ilen = sizeof(info);
4788
4789 #ifdef HAVE_NETINET_TCP_FSM_H
4790 /* This is FreeBSD */
4791 # define __TCP_LISTEN TCPS_LISTEN
4792 # define __TCP_ESTABLISHED TCPS_ESTABLISHED
4793 # define __TCP_CLOSE TCPS_CLOSED
4794 #else
4795 /* This is Linux */
4796 # define __TCP_LISTEN TCP_LISTEN
4797 # define __TCP_ESTABLISHED TCP_ESTABLISHED
4798 # define __TCP_CLOSE TCP_CLOSE
4799 #endif
4800
4801                         ZERO_STRUCT(info);
4802                         if (si->listening) {
4803                                 info.tcpi_state = __TCP_LISTEN;
4804                         } else if (si->connected) {
4805                                 /*
4806                                  * For now we just fake a few values
4807                                  * supported both by FreeBSD and Linux
4808                                  */
4809                                 info.tcpi_state = __TCP_ESTABLISHED;
4810                                 info.tcpi_rto = 200000;  /* 200 msec */
4811                                 info.tcpi_rtt = 5000;    /* 5 msec */
4812                                 info.tcpi_rttvar = 5000; /* 5 msec */
4813                         } else {
4814                                 info.tcpi_state = __TCP_CLOSE;
4815                                 info.tcpi_rto = 1000000;  /* 1 sec */
4816                                 info.tcpi_rtt = 0;
4817                                 info.tcpi_rttvar = 250000; /* 250 msec */
4818                         }
4819
4820                         if (optval == NULL || optlen == NULL ||
4821                             *optlen < (socklen_t)ilen) {
4822                                 errno = EINVAL;
4823                                 ret = -1;
4824                                 goto done;
4825                         }
4826
4827                         *optlen = ilen;
4828                         memcpy(optval, &info, ilen);
4829
4830                         ret = 0;
4831                         goto done;
4832                 }
4833 #endif /* TCP_INFO */
4834                 default:
4835                         break;
4836                 }
4837         }
4838
4839         errno = ENOPROTOOPT;
4840         ret = -1;
4841
4842 done:
4843         SWRAP_UNLOCK_SI(si);
4844         return ret;
4845 }
4846
4847 #ifdef HAVE_ACCEPT_PSOCKLEN_T
4848 int getsockopt(int s, int level, int optname, void *optval, Psocklen_t optlen)
4849 #else
4850 int getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen)
4851 #endif
4852 {
4853         return swrap_getsockopt(s, level, optname, optval, (socklen_t *)optlen);
4854 }
4855
4856 /****************************************************************************
4857  *   SETSOCKOPT
4858  ***************************************************************************/
4859
4860 static int swrap_setsockopt(int s, int level, int optname,
4861                             const void *optval, socklen_t optlen)
4862 {
4863         struct socket_info *si = find_socket_info(s);
4864         int ret;
4865
4866         if (!si) {
4867                 return libc_setsockopt(s,
4868                                        level,
4869                                        optname,
4870                                        optval,
4871                                        optlen);
4872         }
4873
4874         if (level == SOL_SOCKET) {
4875                 return libc_setsockopt(s,
4876                                        level,
4877                                        optname,
4878                                        optval,
4879                                        optlen);
4880         }
4881
4882         SWRAP_LOCK_SI(si);
4883
4884         if (level == IPPROTO_TCP) {
4885                 switch (optname) {
4886 #ifdef TCP_NODELAY
4887                 case TCP_NODELAY: {
4888                         int i;
4889
4890                         /*
4891                          * This enables sending packets directly out over TCP.
4892                          * A unix socket is doing that any way.
4893                          */
4894                         if (optval == NULL || optlen == 0 ||
4895                             optlen < (socklen_t)sizeof(int)) {
4896                                 errno = EINVAL;
4897                                 ret = -1;
4898                                 goto done;
4899                         }
4900
4901                         i = *discard_const_p(int, optval);
4902                         if (i != 0 && i != 1) {
4903                                 errno = EINVAL;
4904                                 ret = -1;
4905                                 goto done;
4906                         }
4907                         si->tcp_nodelay = i;
4908
4909                         ret = 0;
4910                         goto done;
4911                 }
4912 #endif /* TCP_NODELAY */
4913                 default:
4914                         break;
4915                 }
4916         }
4917
4918         switch (si->family) {
4919         case AF_INET:
4920                 if (level == IPPROTO_IP) {
4921 #ifdef IP_PKTINFO
4922                         if (optname == IP_PKTINFO) {
4923                                 si->pktinfo = AF_INET;
4924                         }
4925 #endif /* IP_PKTINFO */
4926                 }
4927                 ret = 0;
4928                 goto done;
4929 #ifdef HAVE_IPV6
4930         case AF_INET6:
4931                 if (level == IPPROTO_IPV6) {
4932 #ifdef IPV6_RECVPKTINFO
4933                         if (optname == IPV6_RECVPKTINFO) {
4934                                 si->pktinfo = AF_INET6;
4935                         }
4936 #endif /* IPV6_PKTINFO */
4937                 }
4938                 ret = 0;
4939                 goto done;
4940 #endif
4941         default:
4942                 errno = ENOPROTOOPT;
4943                 ret = -1;
4944                 goto done;
4945         }
4946
4947 done:
4948         SWRAP_UNLOCK_SI(si);
4949         return ret;
4950 }
4951
4952 int setsockopt(int s, int level, int optname,
4953                const void *optval, socklen_t optlen)
4954 {
4955         return swrap_setsockopt(s, level, optname, optval, optlen);
4956 }
4957
4958 /****************************************************************************
4959  *   IOCTL
4960  ***************************************************************************/
4961
4962 static int swrap_vioctl(int s, unsigned long int r, va_list va)
4963 {
4964         struct socket_info *si = find_socket_info(s);
4965         va_list ap;
4966         int *value_ptr = NULL;
4967         int rc;
4968
4969         if (!si) {
4970                 return libc_vioctl(s, r, va);
4971         }
4972
4973         SWRAP_LOCK_SI(si);
4974
4975         va_copy(ap, va);
4976
4977         rc = libc_vioctl(s, r, va);
4978
4979         switch (r) {
4980         case FIONREAD:
4981                 if (rc == 0) {
4982                         value_ptr = ((int *)va_arg(ap, int *));
4983                 }
4984
4985                 if (rc == -1 && errno != EAGAIN && errno != ENOBUFS) {
4986                         swrap_pcap_dump_packet(si, NULL, SWRAP_PENDING_RST, NULL, 0);
4987                 } else if (value_ptr != NULL && *value_ptr == 0) { /* END OF FILE */
4988                         swrap_pcap_dump_packet(si, NULL, SWRAP_PENDING_RST, NULL, 0);
4989                 }
4990                 break;
4991 #ifdef FIONWRITE
4992         case FIONWRITE:
4993                 /* this is FreeBSD */
4994                 FALL_THROUGH; /* to TIOCOUTQ */
4995 #endif /* FIONWRITE */
4996         case TIOCOUTQ: /* same as SIOCOUTQ on Linux */
4997                 /*
4998                  * This may return more bytes then the application
4999                  * sent into the socket, for tcp it should
5000                  * return the number of unacked bytes.
5001                  *
5002                  * On AF_UNIX, all bytes are immediately acked!
5003                  */
5004                 if (rc == 0) {
5005                         value_ptr = ((int *)va_arg(ap, int *));
5006                         *value_ptr = 0;
5007                 }
5008                 break;
5009         }
5010
5011         va_end(ap);
5012
5013         SWRAP_UNLOCK_SI(si);
5014         return rc;
5015 }
5016
5017 #ifdef HAVE_IOCTL_INT
5018 int ioctl(int s, int r, ...)
5019 #else
5020 int ioctl(int s, unsigned long int r, ...)
5021 #endif
5022 {
5023         va_list va;
5024         int rc;
5025
5026         va_start(va, r);
5027
5028         rc = swrap_vioctl(s, (unsigned long int) r, va);
5029
5030         va_end(va);
5031
5032         return rc;
5033 }
5034
5035 /*****************
5036  * CMSG
5037  *****************/
5038
5039 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
5040
5041 #ifndef CMSG_ALIGN
5042 # ifdef _ALIGN /* BSD */
5043 #define CMSG_ALIGN _ALIGN
5044 # else
5045 #define CMSG_ALIGN(len) (((len) + sizeof(size_t) - 1) & ~(sizeof(size_t) - 1))
5046 # endif /* _ALIGN */
5047 #endif /* CMSG_ALIGN */
5048
5049 /**
5050  * @brief Add a cmsghdr to a msghdr.
5051  *
5052  * This is an function to add any type of cmsghdr. It will operate on the
5053  * msg->msg_control and msg->msg_controllen you pass in by adapting them to
5054  * the buffer position after the added cmsg element. Hence, this function is
5055  * intended to be used with an intermediate msghdr and not on the original
5056  * one handed in by the client.
5057  *
5058  * @param[in]  msg      The msghdr to which to add the cmsg.
5059  *
5060  * @param[in]  level    The cmsg level to set.
5061  *
5062  * @param[in]  type     The cmsg type to set.
5063  *
5064  * @param[in]  data     The cmsg data to set.
5065  *
5066  * @param[in]  len      the length of the data to set.
5067  */
5068 static void swrap_msghdr_add_cmsghdr(struct msghdr *msg,
5069                                      int level,
5070                                      int type,
5071                                      const void *data,
5072                                      size_t len)
5073 {
5074         size_t cmlen = CMSG_LEN(len);
5075         size_t cmspace = CMSG_SPACE(len);
5076         uint8_t cmbuf[cmspace];
5077         void *cast_ptr = (void *)cmbuf;
5078         struct cmsghdr *cm = (struct cmsghdr *)cast_ptr;
5079         uint8_t *p;
5080
5081         memset(cmbuf, 0, cmspace);
5082
5083         if (msg->msg_controllen < cmlen) {
5084                 cmlen = msg->msg_controllen;
5085                 msg->msg_flags |= MSG_CTRUNC;
5086         }
5087
5088         if (msg->msg_controllen < cmspace) {
5089                 cmspace = msg->msg_controllen;
5090         }
5091
5092         /*
5093          * We copy the full input data into an intermediate cmsghdr first
5094          * in order to more easily cope with truncation.
5095          */
5096         cm->cmsg_len = cmlen;
5097         cm->cmsg_level = level;
5098         cm->cmsg_type = type;
5099         memcpy(CMSG_DATA(cm), data, len);
5100
5101         /*
5102          * We now copy the possibly truncated buffer.
5103          * We copy cmlen bytes, but consume cmspace bytes,
5104          * leaving the possible padding uninitialiazed.
5105          */
5106         p = (uint8_t *)msg->msg_control;
5107         memcpy(p, cm, cmlen);
5108         p += cmspace;
5109         msg->msg_control = p;
5110         msg->msg_controllen -= cmspace;
5111
5112         return;
5113 }
5114
5115 static int swrap_msghdr_add_pktinfo(struct socket_info *si,
5116                                     struct msghdr *msg)
5117 {
5118         /* Add packet info */
5119         switch (si->pktinfo) {
5120 #if defined(IP_PKTINFO) && (defined(HAVE_STRUCT_IN_PKTINFO) || defined(IP_RECVDSTADDR))
5121         case AF_INET: {
5122                 struct sockaddr_in *sin;
5123 #if defined(HAVE_STRUCT_IN_PKTINFO)
5124                 struct in_pktinfo pkt;
5125 #elif defined(IP_RECVDSTADDR)
5126                 struct in_addr pkt;
5127 #endif
5128
5129                 if (si->bindname.sa_socklen == sizeof(struct sockaddr_in)) {
5130                         sin = &si->bindname.sa.in;
5131                 } else {
5132                         if (si->myname.sa_socklen != sizeof(struct sockaddr_in)) {
5133                                 return 0;
5134                         }
5135                         sin = &si->myname.sa.in;
5136                 }
5137
5138                 ZERO_STRUCT(pkt);
5139
5140 #if defined(HAVE_STRUCT_IN_PKTINFO)
5141                 pkt.ipi_ifindex = socket_wrapper_default_iface();
5142                 pkt.ipi_addr.s_addr = sin->sin_addr.s_addr;
5143 #elif defined(IP_RECVDSTADDR)
5144                 pkt = sin->sin_addr;
5145 #endif
5146
5147                 swrap_msghdr_add_cmsghdr(msg, IPPROTO_IP, IP_PKTINFO,
5148                                          &pkt, sizeof(pkt));
5149
5150                 break;
5151         }
5152 #endif /* IP_PKTINFO */
5153 #if defined(HAVE_IPV6)
5154         case AF_INET6: {
5155 #if defined(IPV6_PKTINFO) && defined(HAVE_STRUCT_IN6_PKTINFO)
5156                 struct sockaddr_in6 *sin6;
5157                 struct in6_pktinfo pkt6;
5158
5159                 if (si->bindname.sa_socklen == sizeof(struct sockaddr_in6)) {
5160                         sin6 = &si->bindname.sa.in6;
5161                 } else {
5162                         if (si->myname.sa_socklen != sizeof(struct sockaddr_in6)) {
5163                                 return 0;
5164                         }
5165                         sin6 = &si->myname.sa.in6;
5166                 }
5167
5168                 ZERO_STRUCT(pkt6);
5169
5170                 pkt6.ipi6_ifindex = socket_wrapper_default_iface();
5171                 pkt6.ipi6_addr = sin6->sin6_addr;
5172
5173                 swrap_msghdr_add_cmsghdr(msg, IPPROTO_IPV6, IPV6_PKTINFO,
5174                                         &pkt6, sizeof(pkt6));
5175 #endif /* HAVE_STRUCT_IN6_PKTINFO */
5176
5177                 break;
5178         }
5179 #endif /* IPV6_PKTINFO */
5180         default:
5181                 return -1;
5182         }
5183
5184         return 0;
5185 }
5186
5187 static int swrap_msghdr_add_socket_info(struct socket_info *si,
5188                                         struct msghdr *omsg)
5189 {
5190         int rc = 0;
5191
5192         if (si->pktinfo > 0) {
5193                 rc = swrap_msghdr_add_pktinfo(si, omsg);
5194         }
5195
5196         return rc;
5197 }
5198
5199 static int swrap_sendmsg_copy_cmsg(const struct cmsghdr *cmsg,
5200                                    uint8_t **cm_data,
5201                                    size_t *cm_data_space);
5202 static int swrap_sendmsg_filter_cmsg_ipproto_ip(const struct cmsghdr *cmsg,
5203                                                 uint8_t **cm_data,
5204                                                 size_t *cm_data_space);
5205 static int swrap_sendmsg_filter_cmsg_sol_socket(const struct cmsghdr *cmsg,
5206                                                 uint8_t **cm_data,
5207                                                 size_t *cm_data_space);
5208
5209 static int swrap_sendmsg_filter_cmsghdr(const struct msghdr *_msg,
5210                                         uint8_t **cm_data,
5211                                         size_t *cm_data_space)
5212 {
5213         struct msghdr *msg = discard_const_p(struct msghdr, _msg);
5214         struct cmsghdr *cmsg;
5215         int rc = -1;
5216
5217         /* Nothing to do */
5218         if (msg->msg_controllen == 0 || msg->msg_control == NULL) {
5219                 return 0;
5220         }
5221
5222         for (cmsg = CMSG_FIRSTHDR(msg);
5223              cmsg != NULL;
5224              cmsg = CMSG_NXTHDR(msg, cmsg)) {
5225                 switch (cmsg->cmsg_level) {
5226                 case IPPROTO_IP:
5227                         rc = swrap_sendmsg_filter_cmsg_ipproto_ip(cmsg,
5228                                                                   cm_data,
5229                                                                   cm_data_space);
5230                         break;
5231                 case SOL_SOCKET:
5232                         rc = swrap_sendmsg_filter_cmsg_sol_socket(cmsg,
5233                                                                   cm_data,
5234                                                                   cm_data_space);
5235                         break;
5236                 default:
5237                         rc = swrap_sendmsg_copy_cmsg(cmsg,
5238                                                      cm_data,
5239                                                      cm_data_space);
5240                         break;
5241                 }
5242                 if (rc < 0) {
5243                         int saved_errno = errno;
5244                         SAFE_FREE(*cm_data);
5245                         *cm_data_space = 0;
5246                         errno = saved_errno;
5247                         return rc;
5248                 }
5249         }
5250
5251         return rc;
5252 }
5253
5254 static int swrap_sendmsg_copy_cmsg(const struct cmsghdr *cmsg,
5255                                    uint8_t **cm_data,
5256                                    size_t *cm_data_space)
5257 {
5258         size_t cmspace;
5259         uint8_t *p;
5260
5261         cmspace = *cm_data_space + CMSG_ALIGN(cmsg->cmsg_len);
5262
5263         p = realloc((*cm_data), cmspace);
5264         if (p == NULL) {
5265                 return -1;
5266         }
5267         (*cm_data) = p;
5268
5269         p = (*cm_data) + (*cm_data_space);
5270         *cm_data_space = cmspace;
5271
5272         memcpy(p, cmsg, cmsg->cmsg_len);
5273
5274         return 0;
5275 }
5276
5277 static int swrap_sendmsg_filter_cmsg_pktinfo(const struct cmsghdr *cmsg,
5278                                             uint8_t **cm_data,
5279                                             size_t *cm_data_space);
5280
5281
5282 static int swrap_sendmsg_filter_cmsg_ipproto_ip(const struct cmsghdr *cmsg,
5283                                                 uint8_t **cm_data,
5284                                                 size_t *cm_data_space)
5285 {
5286         int rc = -1;
5287
5288         switch(cmsg->cmsg_type) {
5289 #ifdef IP_PKTINFO
5290         case IP_PKTINFO:
5291                 rc = swrap_sendmsg_filter_cmsg_pktinfo(cmsg,
5292                                                        cm_data,
5293                                                        cm_data_space);
5294                 break;
5295 #endif
5296 #ifdef IPV6_PKTINFO
5297         case IPV6_PKTINFO:
5298                 rc = swrap_sendmsg_filter_cmsg_pktinfo(cmsg,
5299                                                        cm_data,
5300                                                        cm_data_space);
5301                 break;
5302 #endif
5303         default:
5304                 break;
5305         }
5306
5307         return rc;
5308 }
5309
5310 static int swrap_sendmsg_filter_cmsg_pktinfo(const struct cmsghdr *cmsg,
5311                                              uint8_t **cm_data,
5312                                              size_t *cm_data_space)
5313 {
5314         (void)cmsg; /* unused */
5315         (void)cm_data; /* unused */
5316         (void)cm_data_space; /* unused */
5317
5318         /*
5319          * Passing a IP pktinfo to a unix socket might be rejected by the
5320          * Kernel, at least on FreeBSD. So skip this cmsg.
5321          */
5322         return 0;
5323 }
5324
5325 static int swrap_sendmsg_filter_cmsg_sol_socket(const struct cmsghdr *cmsg,
5326                                                 uint8_t **cm_data,
5327                                                 size_t *cm_data_space)
5328 {
5329         int rc = -1;
5330
5331         switch (cmsg->cmsg_type) {
5332         case SCM_RIGHTS:
5333                 SWRAP_LOG(SWRAP_LOG_TRACE,
5334                           "Ignoring SCM_RIGHTS on inet socket!");
5335                 rc = 0;
5336                 break;
5337 #ifdef SCM_CREDENTIALS
5338         case SCM_CREDENTIALS:
5339                 SWRAP_LOG(SWRAP_LOG_TRACE,
5340                           "Ignoring SCM_CREDENTIALS on inet socket!");
5341                 rc = 0;
5342                 break;
5343 #endif /* SCM_CREDENTIALS */
5344         default:
5345                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5346                                              cm_data,
5347                                              cm_data_space);
5348                 break;
5349         }
5350
5351         return rc;
5352 }
5353
5354 static const uint64_t swrap_unix_scm_right_magic = 0x8e0e13f27c42fc36;
5355
5356 /*
5357  * We only allow up to 6 fds at a time
5358  * as that's more than enough for Samba
5359  * and it means we can keep the logic simple
5360  * and work with fixed size arrays.
5361  *
5362  * We also keep sizeof(struct swrap_unix_scm_rights)
5363  * under PIPE_BUF (4096) in order to allow a non-blocking
5364  * write into the pipe.
5365  */
5366 #ifndef PIPE_BUF
5367 #define PIPE_BUF 4096
5368 #endif
5369 #define SWRAP_MAX_PASSED_FDS ((size_t)6)
5370 #define SWRAP_MAX_PASSED_SOCKET_INFO SWRAP_MAX_PASSED_FDS
5371 struct swrap_unix_scm_rights_payload {
5372         uint8_t num_idxs;
5373         int8_t idxs[SWRAP_MAX_PASSED_FDS];
5374         struct socket_info infos[SWRAP_MAX_PASSED_SOCKET_INFO];
5375 };
5376 struct swrap_unix_scm_rights {
5377         uint64_t magic;
5378         char package_name[sizeof(SOCKET_WRAPPER_PACKAGE)];
5379         char package_version[sizeof(SOCKET_WRAPPER_VERSION)];
5380         uint32_t full_size;
5381         uint32_t payload_size;
5382         struct swrap_unix_scm_rights_payload payload;
5383 };
5384
5385 static void swrap_dec_fd_passed_array(size_t num, struct socket_info **array)
5386 {
5387         int saved_errno = errno;
5388         size_t i;
5389
5390         for (i = 0; i < num; i++) {
5391                 struct socket_info *si = array[i];
5392                 if (si == NULL) {
5393                         continue;
5394                 }
5395
5396                 SWRAP_LOCK_SI(si);
5397                 swrap_dec_refcount(si);
5398                 if (si->fd_passed > 0) {
5399                         si->fd_passed -= 1;
5400                 }
5401                 SWRAP_UNLOCK_SI(si);
5402                 array[i] = NULL;
5403         }
5404
5405         errno = saved_errno;
5406 }
5407
5408 static void swrap_undo_si_idx_array(size_t num, int *array)
5409 {
5410         int saved_errno = errno;
5411         size_t i;
5412
5413         swrap_mutex_lock(&first_free_mutex);
5414
5415         for (i = 0; i < num; i++) {
5416                 struct socket_info *si = NULL;
5417
5418                 if (array[i] == -1) {
5419                         continue;
5420                 }
5421
5422                 si = swrap_get_socket_info(array[i]);
5423                 if (si == NULL) {
5424                         continue;
5425                 }
5426
5427                 SWRAP_LOCK_SI(si);
5428                 swrap_dec_refcount(si);
5429                 SWRAP_UNLOCK_SI(si);
5430
5431                 swrap_set_next_free(si, first_free);
5432                 first_free = array[i];
5433                 array[i] = -1;
5434         }
5435
5436         swrap_mutex_unlock(&first_free_mutex);
5437         errno = saved_errno;
5438 }
5439
5440 static void swrap_close_fd_array(size_t num, const int *array)
5441 {
5442         int saved_errno = errno;
5443         size_t i;
5444
5445         for (i = 0; i < num; i++) {
5446                 if (array[i] == -1) {
5447                         continue;
5448                 }
5449                 libc_close(array[i]);
5450         }
5451
5452         errno = saved_errno;
5453 }
5454
5455 union __swrap_fds {
5456         const uint8_t *p;
5457         int *fds;
5458 };
5459
5460 union __swrap_cmsghdr {
5461         const uint8_t *p;
5462         struct cmsghdr *cmsg;
5463 };
5464
5465 static int swrap_sendmsg_unix_scm_rights(struct cmsghdr *cmsg,
5466                                          uint8_t **cm_data,
5467                                          size_t *cm_data_space,
5468                                          int *scm_rights_pipe_fd)
5469 {
5470         struct swrap_unix_scm_rights info;
5471         struct swrap_unix_scm_rights_payload *payload = NULL;
5472         int si_idx_array[SWRAP_MAX_PASSED_FDS];
5473         struct socket_info *si_array[SWRAP_MAX_PASSED_FDS] = { NULL, };
5474         size_t info_idx = 0;
5475         size_t size_fds_in;
5476         size_t num_fds_in;
5477         union __swrap_fds __fds_in = { .p = NULL, };
5478         const int *fds_in = NULL;
5479         size_t num_fds_out;
5480         size_t size_fds_out;
5481         union __swrap_fds __fds_out = { .p = NULL, };
5482         int *fds_out = NULL;
5483         size_t cmsg_len;
5484         size_t cmsg_space;
5485         size_t new_cm_data_space;
5486         union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
5487         struct cmsghdr *new_cmsg = NULL;
5488         uint8_t *p = NULL;
5489         size_t i;
5490         int pipefd[2] = { -1, -1 };
5491         int rc;
5492         ssize_t sret;
5493
5494         /*
5495          * We pass this a buffer to the kernel make sure any padding
5496          * is also cleared.
5497          */
5498         ZERO_STRUCT(info);
5499         info.magic = swrap_unix_scm_right_magic;
5500         memcpy(info.package_name,
5501                SOCKET_WRAPPER_PACKAGE,
5502                sizeof(info.package_name));
5503         memcpy(info.package_version,
5504                SOCKET_WRAPPER_VERSION,
5505                sizeof(info.package_version));
5506         info.full_size = sizeof(info);
5507         info.payload_size = sizeof(info.payload);
5508         payload = &info.payload;
5509
5510         if (*scm_rights_pipe_fd != -1) {
5511                 SWRAP_LOG(SWRAP_LOG_ERROR,
5512                           "Two SCM_RIGHTS headers are not supported by socket_wrapper");
5513                 errno = EINVAL;
5514                 return -1;
5515         }
5516
5517         if (cmsg->cmsg_len < CMSG_LEN(0)) {
5518                 SWRAP_LOG(SWRAP_LOG_ERROR,
5519                           "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
5520                           (size_t)cmsg->cmsg_len,
5521                           CMSG_LEN(0));
5522                 errno = EINVAL;
5523                 return -1;
5524         }
5525         size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
5526         if ((size_fds_in % sizeof(int)) != 0) {
5527                 SWRAP_LOG(SWRAP_LOG_ERROR,
5528                           "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
5529                           (size_t)cmsg->cmsg_len,
5530                           size_fds_in,
5531                           sizeof(int));
5532                 errno = EINVAL;
5533                 return -1;
5534         }
5535         num_fds_in = size_fds_in / sizeof(int);
5536         if (num_fds_in > SWRAP_MAX_PASSED_FDS) {
5537                 SWRAP_LOG(SWRAP_LOG_ERROR,
5538                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5539                           "num_fds_in=%zu > "
5540                           "SWRAP_MAX_PASSED_FDS(%zu)",
5541                           (size_t)cmsg->cmsg_len,
5542                           size_fds_in,
5543                           num_fds_in,
5544                           SWRAP_MAX_PASSED_FDS);
5545                 errno = EINVAL;
5546                 return -1;
5547         }
5548         if (num_fds_in == 0) {
5549                 SWRAP_LOG(SWRAP_LOG_ERROR,
5550                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5551                           "num_fds_in=%zu",
5552                           (size_t)cmsg->cmsg_len,
5553                           size_fds_in,
5554                           num_fds_in);
5555                 errno = EINVAL;
5556                 return -1;
5557         }
5558         __fds_in.p = CMSG_DATA(cmsg);
5559         fds_in = __fds_in.fds;
5560         num_fds_out = num_fds_in + 1;
5561
5562         SWRAP_LOG(SWRAP_LOG_TRACE,
5563                   "num_fds_in=%zu num_fds_out=%zu",
5564                   num_fds_in, num_fds_out);
5565
5566         size_fds_out = sizeof(int) * num_fds_out;
5567         cmsg_len = CMSG_LEN(size_fds_out);
5568         cmsg_space = CMSG_SPACE(size_fds_out);
5569
5570         new_cm_data_space = *cm_data_space + cmsg_space;
5571
5572         p = realloc((*cm_data), new_cm_data_space);
5573         if (p == NULL) {
5574                 return -1;
5575         }
5576         (*cm_data) = p;
5577         p = (*cm_data) + (*cm_data_space);
5578         memset(p, 0, cmsg_space);
5579         __new_cmsg.p = p;
5580         new_cmsg = __new_cmsg.cmsg;
5581         *new_cmsg = *cmsg;
5582         __fds_out.p = CMSG_DATA(new_cmsg);
5583         fds_out = __fds_out.fds;
5584         memcpy(fds_out, fds_in, size_fds_in);
5585         new_cmsg->cmsg_len = cmsg->cmsg_len;
5586
5587         for (i = 0; i < num_fds_in; i++) {
5588                 size_t j;
5589
5590                 payload->idxs[i] = -1;
5591                 payload->num_idxs++;
5592
5593                 si_idx_array[i] = find_socket_info_index(fds_in[i]);
5594                 if (si_idx_array[i] == -1) {
5595                         continue;
5596                 }
5597
5598                 si_array[i] = swrap_get_socket_info(si_idx_array[i]);
5599                 if (si_array[i] == NULL) {
5600                         SWRAP_LOG(SWRAP_LOG_ERROR,
5601                                   "fds_in[%zu]=%d si_idx_array[%zu]=%d missing!",
5602                                   i, fds_in[i], i, si_idx_array[i]);
5603                         errno = EINVAL;
5604                         return -1;
5605                 }
5606
5607                 for (j = 0; j < i; j++) {
5608                         if (si_array[j] == si_array[i]) {
5609                                 payload->idxs[i] = payload->idxs[j];
5610                                 break;
5611                         }
5612                 }
5613                 if (payload->idxs[i] == -1) {
5614                         if (info_idx >= SWRAP_MAX_PASSED_SOCKET_INFO) {
5615                                 SWRAP_LOG(SWRAP_LOG_ERROR,
5616                                           "fds_in[%zu]=%d,si_idx_array[%zu]=%d: "
5617                                           "info_idx=%zu >= SWRAP_MAX_PASSED_FDS(%zu)!",
5618                                           i, fds_in[i], i, si_idx_array[i],
5619                                           info_idx,
5620                                           SWRAP_MAX_PASSED_SOCKET_INFO);
5621                                 errno = EINVAL;
5622                                 return -1;
5623                         }
5624                         payload->idxs[i] = info_idx;
5625                         info_idx += 1;
5626                         continue;
5627                 }
5628         }
5629
5630         for (i = 0; i < num_fds_in; i++) {
5631                 struct socket_info *si = si_array[i];
5632
5633                 if (si == NULL) {
5634                         SWRAP_LOG(SWRAP_LOG_TRACE,
5635                                   "fds_in[%zu]=%d not an inet socket",
5636                                   i, fds_in[i]);
5637                         continue;
5638                 }
5639
5640                 SWRAP_LOG(SWRAP_LOG_TRACE,
5641                           "fds_in[%zu]=%d si_idx_array[%zu]=%d "
5642                           "passing as info.idxs[%zu]=%d!",
5643                           i, fds_in[i],
5644                           i, si_idx_array[i],
5645                           i, payload->idxs[i]);
5646
5647                 SWRAP_LOCK_SI(si);
5648                 si->fd_passed += 1;
5649                 payload->infos[payload->idxs[i]] = *si;
5650                 payload->infos[payload->idxs[i]].fd_passed = 0;
5651                 SWRAP_UNLOCK_SI(si);
5652         }
5653
5654         rc = pipe(pipefd);
5655         if (rc == -1) {
5656                 int saved_errno = errno;
5657                 SWRAP_LOG(SWRAP_LOG_ERROR,
5658                           "pipe() failed - %d %s",
5659                           saved_errno,
5660                           strerror(saved_errno));
5661                 swrap_dec_fd_passed_array(num_fds_in, si_array);
5662                 errno = saved_errno;
5663                 return -1;
5664         }
5665
5666         sret = libc_write(pipefd[1], &info, sizeof(info));
5667         if (sret != sizeof(info)) {
5668                 int saved_errno = errno;
5669                 if (sret != -1) {
5670                         saved_errno = EINVAL;
5671                 }
5672                 SWRAP_LOG(SWRAP_LOG_ERROR,
5673                           "write() failed - sret=%zd - %d %s",
5674                           sret, saved_errno,
5675                           strerror(saved_errno));
5676                 swrap_dec_fd_passed_array(num_fds_in, si_array);
5677                 libc_close(pipefd[1]);
5678                 libc_close(pipefd[0]);
5679                 errno = saved_errno;
5680                 return -1;
5681         }
5682         libc_close(pipefd[1]);
5683
5684         /*
5685          * Add the pipe read end to the end of the passed fd array
5686          */
5687         fds_out[num_fds_in] = pipefd[0];
5688         new_cmsg->cmsg_len = cmsg_len;
5689
5690         /* we're done ... */
5691         *scm_rights_pipe_fd = pipefd[0];
5692         *cm_data_space = new_cm_data_space;
5693
5694         return 0;
5695 }
5696
5697 static int swrap_sendmsg_unix_sol_socket(struct cmsghdr *cmsg,
5698                                          uint8_t **cm_data,
5699                                          size_t *cm_data_space,
5700                                          int *scm_rights_pipe_fd)
5701 {
5702         int rc = -1;
5703
5704         switch (cmsg->cmsg_type) {
5705         case SCM_RIGHTS:
5706                 rc = swrap_sendmsg_unix_scm_rights(cmsg,
5707                                                    cm_data,
5708                                                    cm_data_space,
5709                                                    scm_rights_pipe_fd);
5710                 break;
5711         default:
5712                 rc = swrap_sendmsg_copy_cmsg(cmsg,
5713                                              cm_data,
5714                                              cm_data_space);
5715                 break;
5716         }
5717
5718         return rc;
5719 }
5720
5721 static int swrap_recvmsg_unix_scm_rights(struct cmsghdr *cmsg,
5722                                          uint8_t **cm_data,
5723                                          size_t *cm_data_space)
5724 {
5725         int scm_rights_pipe_fd = -1;
5726         struct swrap_unix_scm_rights info;
5727         struct swrap_unix_scm_rights_payload *payload = NULL;
5728         int si_idx_array[SWRAP_MAX_PASSED_FDS];
5729         size_t size_fds_in;
5730         size_t num_fds_in;
5731         union __swrap_fds __fds_in = { .p = NULL, };
5732         const int *fds_in = NULL;
5733         size_t num_fds_out;
5734         size_t size_fds_out;
5735         union __swrap_fds __fds_out = { .p = NULL, };
5736         int *fds_out = NULL;
5737         size_t cmsg_len;
5738         size_t cmsg_space;
5739         size_t new_cm_data_space;
5740         union __swrap_cmsghdr __new_cmsg = { .p = NULL, };
5741         struct cmsghdr *new_cmsg = NULL;
5742         uint8_t *p = NULL;
5743         ssize_t sret;
5744         size_t i;
5745         int cmp;
5746
5747         if (cmsg->cmsg_len < CMSG_LEN(0)) {
5748                 SWRAP_LOG(SWRAP_LOG_ERROR,
5749                           "cmsg->cmsg_len=%zu < CMSG_LEN(0)=%zu",
5750                           (size_t)cmsg->cmsg_len,
5751                           CMSG_LEN(0));
5752                 errno = EINVAL;
5753                 return -1;
5754         }
5755         size_fds_in = cmsg->cmsg_len - CMSG_LEN(0);
5756         if ((size_fds_in % sizeof(int)) != 0) {
5757                 SWRAP_LOG(SWRAP_LOG_ERROR,
5758                           "cmsg->cmsg_len=%zu => (size_fds_in=%zu %% sizeof(int)=%zu) != 0",
5759                           (size_t)cmsg->cmsg_len,
5760                           size_fds_in,
5761                           sizeof(int));
5762                 errno = EINVAL;
5763                 return -1;
5764         }
5765         num_fds_in = size_fds_in / sizeof(int);
5766         if (num_fds_in > (SWRAP_MAX_PASSED_FDS + 1)) {
5767                 SWRAP_LOG(SWRAP_LOG_ERROR,
5768                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5769                           "num_fds_in=%zu > SWRAP_MAX_PASSED_FDS+1(%zu)",
5770                           (size_t)cmsg->cmsg_len,
5771                           size_fds_in,
5772                           num_fds_in,
5773                           SWRAP_MAX_PASSED_FDS+1);
5774                 errno = EINVAL;
5775                 return -1;
5776         }
5777         if (num_fds_in <= 1) {
5778                 SWRAP_LOG(SWRAP_LOG_ERROR,
5779                           "cmsg->cmsg_len=%zu,size_fds_in=%zu => "
5780                           "num_fds_in=%zu",
5781                           (size_t)cmsg->cmsg_len,
5782                           size_fds_in,
5783                           num_fds_in);
5784                 errno = EINVAL;
5785                 return -1;
5786         }
5787         __fds_in.p = CMSG_DATA(cmsg);
5788         fds_in = __fds_in.fds;
5789         num_fds_out = num_fds_in - 1;
5790
5791         SWRAP_LOG(SWRAP_LOG_TRACE,
5792                   "num_fds_in=%zu num_fds_out=%zu",
5793                   num_fds_in, num_fds_out);
5794
5795         for (i = 0; i < num_fds_in; i++) {
5796                 /* Check if we have a stale fd and remove it */
5797                 swrap_remove_stale(fds_in[i]);
5798         }
5799
5800         scm_rights_pipe_fd = fds_in[num_fds_out];
5801         size_fds_out = sizeof(int) * num_fds_out;
5802         cmsg_len = CMSG_LEN(size_fds_out);
5803         cmsg_space = CMSG_SPACE(size_fds_out);
5804
5805         new_cm_data_space = *cm_data_space + cmsg_space;
5806
5807         p = realloc((*cm_data), new_cm_data_space);
5808         if (p == NULL) {
5809                 swrap_close_fd_array(num_fds_in, fds_in);
5810                 return -1;
5811         }
5812         (*cm_data) = p;
5813         p = (*cm_data) + (*cm_data_space);
5814         memset(p, 0, cmsg_space);
5815         __new_cmsg.p = p;
5816         new_cmsg = __new_cmsg.cmsg;
5817         *new_cmsg = *cmsg;
5818         __fds_out.p = CMSG_DATA(new_cmsg);
5819         fds_out = __fds_out.fds;
5820         memcpy(fds_out, fds_in, size_fds_out);
5821         new_cmsg->cmsg_len = cmsg_len;
5822
5823         sret = read(scm_rights_pipe_fd, &info, sizeof(info));
5824         if (sret != sizeof(info)) {
5825                 int saved_errno = errno;
5826                 if (sret != -1) {
5827                         saved_errno = EINVAL;
5828                 }
5829                 SWRAP_LOG(SWRAP_LOG_ERROR,
5830                           "read() failed - sret=%zd - %d %s",
5831                           sret, saved_errno,
5832                           strerror(saved_errno));
5833                 swrap_close_fd_array(num_fds_in, fds_in);
5834                 errno = saved_errno;
5835                 return -1;
5836         }
5837         libc_close(scm_rights_pipe_fd);
5838         payload = &info.payload;
5839
5840         if (info.magic != swrap_unix_scm_right_magic) {
5841                 SWRAP_LOG(SWRAP_LOG_ERROR,
5842                           "info.magic=0x%llx != swrap_unix_scm_right_magic=0x%llx",
5843                           (unsigned long long)info.magic,
5844                           (unsigned long long)swrap_unix_scm_right_magic);
5845                 swrap_close_fd_array(num_fds_out, fds_out);
5846                 errno = EINVAL;
5847                 return -1;
5848         }
5849
5850         cmp = memcmp(info.package_name,
5851                      SOCKET_WRAPPER_PACKAGE,
5852                      sizeof(info.package_name));
5853         if (cmp != 0) {
5854                 SWRAP_LOG(SWRAP_LOG_ERROR,
5855                           "info.package_name='%.*s' != '%s'",
5856                           (int)sizeof(info.package_name),
5857                           info.package_name,
5858                           SOCKET_WRAPPER_PACKAGE);
5859                 swrap_close_fd_array(num_fds_out, fds_out);
5860                 errno = EINVAL;
5861                 return -1;
5862         }
5863
5864         cmp = memcmp(info.package_version,
5865                      SOCKET_WRAPPER_VERSION,
5866                      sizeof(info.package_version));
5867         if (cmp != 0) {
5868                 SWRAP_LOG(SWRAP_LOG_ERROR,
5869                           "info.package_version='%.*s' != '%s'",
5870                           (int)sizeof(info.package_version),
5871                           info.package_version,
5872                           SOCKET_WRAPPER_VERSION);
5873                 swrap_close_fd_array(num_fds_out, fds_out);
5874                 errno = EINVAL;
5875                 return -1;
5876         }
5877
5878         if (info.full_size != sizeof(info)) {
5879                 SWRAP_LOG(SWRAP_LOG_ERROR,
5880                           "info.full_size=%zu != sizeof(info)=%zu",
5881                           (size_t)info.full_size,
5882                           sizeof(info));
5883                 swrap_close_fd_array(num_fds_out, fds_out);
5884                 errno = EINVAL;
5885                 return -1;
5886         }
5887
5888         if (info.payload_size != sizeof(info.payload)) {
5889                 SWRAP_LOG(SWRAP_LOG_ERROR,
5890                           "info.payload_size=%zu != sizeof(info.payload)=%zu",
5891                           (size_t)info.payload_size,
5892                           sizeof(info.payload));
5893                 swrap_close_fd_array(num_fds_out, fds_out);
5894                 errno = EINVAL;
5895                 return -1;
5896         }
5897
5898         if (payload->num_idxs != num_fds_out) {
5899                 SWRAP_LOG(SWRAP_LOG_ERROR,
5900                           "info.num_idxs=%u != num_fds_out=%zu",
5901                           payload->num_idxs, num_fds_out);
5902                 swrap_close_fd_array(num_fds_out, fds_out);
5903                 errno = EINVAL;
5904                 return -1;
5905         }
5906
5907         for (i = 0; i < num_fds_out; i++) {
5908                 size_t j;
5909
5910                 si_idx_array[i] = -1;
5911
5912                 if (payload->idxs[i] == -1) {
5913                         SWRAP_LOG(SWRAP_LOG_TRACE,
5914                                   "fds_out[%zu]=%d not an inet socket",
5915                                   i, fds_out[i]);
5916                         continue;
5917                 }
5918
5919                 if (payload->idxs[i] < 0) {
5920                         SWRAP_LOG(SWRAP_LOG_ERROR,
5921                                   "fds_out[%zu]=%d info.idxs[%zu]=%d < 0!",
5922                                   i, fds_out[i], i, payload->idxs[i]);
5923                         swrap_close_fd_array(num_fds_out, fds_out);
5924                         errno = EINVAL;
5925                         return -1;
5926                 }
5927
5928                 if (payload->idxs[i] >= payload->num_idxs) {
5929                         SWRAP_LOG(SWRAP_LOG_ERROR,
5930                                   "fds_out[%zu]=%d info.idxs[%zu]=%d >= %u!",
5931                                   i, fds_out[i], i, payload->idxs[i],
5932                                   payload->num_idxs);
5933                         swrap_close_fd_array(num_fds_out, fds_out);
5934                         errno = EINVAL;
5935                         return -1;
5936                 }
5937
5938                 if ((size_t)fds_out[i] >= socket_fds_max) {
5939                         SWRAP_LOG(SWRAP_LOG_ERROR,
5940                                   "The max socket index limit of %zu has been reached, "
5941                                   "trying to add %d",
5942                                   socket_fds_max,
5943                                   fds_out[i]);
5944                         swrap_close_fd_array(num_fds_out, fds_out);
5945                         errno = EMFILE;
5946                         return -1;
5947                 }
5948
5949                 SWRAP_LOG(SWRAP_LOG_TRACE,
5950                           "fds_in[%zu]=%d "
5951                           "received as info.idxs[%zu]=%d!",
5952                           i, fds_out[i],
5953                           i, payload->idxs[i]);
5954
5955                 for (j = 0; j < i; j++) {
5956                         if (payload->idxs[j] == -1) {
5957                                 continue;
5958                         }
5959                         if (payload->idxs[j] == payload->idxs[i]) {
5960                                 si_idx_array[i] = si_idx_array[j];
5961                         }
5962                 }
5963                 if (si_idx_array[i] == -1) {
5964                         const struct socket_info *si = &payload->infos[payload->idxs[i]];
5965
5966                         si_idx_array[i] = swrap_add_socket_info(si);
5967                         if (si_idx_array[i] == -1) {
5968                                 int saved_errno = errno;
5969                                 SWRAP_LOG(SWRAP_LOG_ERROR,
5970                                           "The max socket index limit of %zu has been reached, "
5971                                           "trying to add %d",
5972                                           socket_fds_max,
5973                                           fds_out[i]);
5974                                 swrap_undo_si_idx_array(i, si_idx_array);
5975                                 swrap_close_fd_array(num_fds_out, fds_out);
5976                                 errno = saved_errno;
5977                                 return -1;
5978                         }
5979                         SWRAP_LOG(SWRAP_LOG_TRACE,
5980                                   "Imported %s socket for protocol %s, fd=%d",
5981                                   si->family == AF_INET ? "IPv4" : "IPv6",
5982                                   si->type == SOCK_DGRAM ? "UDP" : "TCP",
5983                                   fds_out[i]);
5984                 }
5985         }
5986
5987         for (i = 0; i < num_fds_out; i++) {
5988                 if (si_idx_array[i] == -1) {
5989                         continue;
5990                 }
5991                 set_socket_info_index(fds_out[i], si_idx_array[i]);
5992         }
5993
5994         /* we're done ... */
5995         *cm_data_space = new_cm_data_space;
5996
5997         return 0;
5998 }
5999
6000 static int swrap_recvmsg_unix_sol_socket(struct cmsghdr *cmsg,
6001                                          uint8_t **cm_data,
6002                                          size_t *cm_data_space)
6003 {
6004         int rc = -1;
6005
6006         switch (cmsg->cmsg_type) {
6007         case SCM_RIGHTS:
6008                 rc = swrap_recvmsg_unix_scm_rights(cmsg,
6009                                                    cm_data,
6010                                                    cm_data_space);
6011                 break;
6012         default:
6013                 rc = swrap_sendmsg_copy_cmsg(cmsg,
6014                                              cm_data,
6015                                              cm_data_space);
6016                 break;
6017         }
6018
6019         return rc;
6020 }
6021
6022 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6023
6024 static int swrap_sendmsg_before_unix(const struct msghdr *_msg_in,
6025                                      struct msghdr *msg_tmp,
6026                                      int *scm_rights_pipe_fd)
6027 {
6028 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6029         struct msghdr *msg_in = discard_const_p(struct msghdr, _msg_in);
6030         struct cmsghdr *cmsg = NULL;
6031         uint8_t *cm_data = NULL;
6032         size_t cm_data_space = 0;
6033         int rc = -1;
6034
6035         *msg_tmp = *msg_in;
6036         *scm_rights_pipe_fd = -1;
6037
6038         /* Nothing to do */
6039         if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
6040                 return 0;
6041         }
6042
6043         for (cmsg = CMSG_FIRSTHDR(msg_in);
6044              cmsg != NULL;
6045              cmsg = CMSG_NXTHDR(msg_in, cmsg)) {
6046                 switch (cmsg->cmsg_level) {
6047                 case SOL_SOCKET:
6048                         rc = swrap_sendmsg_unix_sol_socket(cmsg,
6049                                                            &cm_data,
6050                                                            &cm_data_space,
6051                                                            scm_rights_pipe_fd);
6052                         break;
6053
6054                 default:
6055                         rc = swrap_sendmsg_copy_cmsg(cmsg,
6056                                                      &cm_data,
6057                                                      &cm_data_space);
6058                         break;
6059                 }
6060                 if (rc < 0) {
6061                         int saved_errno = errno;
6062                         SAFE_FREE(cm_data);
6063                         errno = saved_errno;
6064                         return rc;
6065                 }
6066         }
6067
6068         msg_tmp->msg_controllen = cm_data_space;
6069         msg_tmp->msg_control = cm_data;
6070
6071         return 0;
6072 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6073         *msg_tmp = *_msg_in;
6074         return 0;
6075 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6076 }
6077
6078 static ssize_t swrap_sendmsg_after_unix(struct msghdr *msg_tmp,
6079                                         ssize_t ret,
6080                                         int scm_rights_pipe_fd)
6081 {
6082 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6083         int saved_errno = errno;
6084         SAFE_FREE(msg_tmp->msg_control);
6085         if (scm_rights_pipe_fd != -1) {
6086                 libc_close(scm_rights_pipe_fd);
6087         }
6088         errno = saved_errno;
6089 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6090         return ret;
6091 }
6092
6093 static int swrap_recvmsg_before_unix(struct msghdr *msg_in,
6094                                      struct msghdr *msg_tmp,
6095                                      uint8_t **tmp_control)
6096 {
6097 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6098         const size_t cm_extra_space = CMSG_SPACE(sizeof(int));
6099         uint8_t *cm_data = NULL;
6100         size_t cm_data_space = 0;
6101
6102         *msg_tmp = *msg_in;
6103         *tmp_control = NULL;
6104
6105         SWRAP_LOG(SWRAP_LOG_TRACE,
6106                   "msg_in->msg_controllen=%zu",
6107                   (size_t)msg_in->msg_controllen);
6108
6109         /* Nothing to do */
6110         if (msg_in->msg_controllen == 0 || msg_in->msg_control == NULL) {
6111                 return 0;
6112         }
6113
6114         /*
6115          * We need to give the kernel a bit more space in order
6116          * recv the pipe fd, added by swrap_sendmsg_before_unix()).
6117          * swrap_recvmsg_after_unix() will hide it again.
6118          */
6119         cm_data_space = msg_in->msg_controllen;
6120         if (cm_data_space < (INT32_MAX - cm_extra_space)) {
6121                 cm_data_space += cm_extra_space;
6122         }
6123         cm_data = calloc(1, cm_data_space);
6124         if (cm_data == NULL) {
6125                 return -1;
6126         }
6127
6128         msg_tmp->msg_controllen = cm_data_space;
6129         msg_tmp->msg_control = cm_data;
6130         *tmp_control = cm_data;
6131
6132         SWRAP_LOG(SWRAP_LOG_TRACE,
6133                   "msg_tmp->msg_controllen=%zu",
6134                   (size_t)msg_tmp->msg_controllen);
6135         return 0;
6136 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6137         *msg_tmp = *msg_in;
6138         *tmp_control = NULL;
6139         return 0;
6140 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6141 }
6142
6143 static ssize_t swrap_recvmsg_after_unix(struct msghdr *msg_tmp,
6144                                         uint8_t **tmp_control,
6145                                         struct msghdr *msg_out,
6146                                         ssize_t ret)
6147 {
6148 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6149         struct cmsghdr *cmsg = NULL;
6150         uint8_t *cm_data = NULL;
6151         size_t cm_data_space = 0;
6152         int rc = -1;
6153
6154         if (ret < 0) {
6155                 int saved_errno = errno;
6156                 SWRAP_LOG(SWRAP_LOG_TRACE, "ret=%zd - %d - %s", ret,
6157                           saved_errno, strerror(saved_errno));
6158                 SAFE_FREE(*tmp_control);
6159                 /* msg_out should not be touched on error */
6160                 errno = saved_errno;
6161                 return ret;
6162         }
6163
6164         SWRAP_LOG(SWRAP_LOG_TRACE,
6165                   "msg_tmp->msg_controllen=%zu",
6166                   (size_t)msg_tmp->msg_controllen);
6167
6168         /* Nothing to do */
6169         if (msg_tmp->msg_controllen == 0 || msg_tmp->msg_control == NULL) {
6170                 int saved_errno = errno;
6171                 *msg_out = *msg_tmp;
6172                 SAFE_FREE(*tmp_control);
6173                 errno = saved_errno;
6174                 return ret;
6175         }
6176
6177         for (cmsg = CMSG_FIRSTHDR(msg_tmp);
6178              cmsg != NULL;
6179              cmsg = CMSG_NXTHDR(msg_tmp, cmsg)) {
6180                 switch (cmsg->cmsg_level) {
6181                 case SOL_SOCKET:
6182                         rc = swrap_recvmsg_unix_sol_socket(cmsg,
6183                                                            &cm_data,
6184                                                            &cm_data_space);
6185                         break;
6186
6187                 default:
6188                         rc = swrap_sendmsg_copy_cmsg(cmsg,
6189                                                      &cm_data,
6190                                                      &cm_data_space);
6191                         break;
6192                 }
6193                 if (rc < 0) {
6194                         int saved_errno = errno;
6195                         SAFE_FREE(cm_data);
6196                         SAFE_FREE(*tmp_control);
6197                         errno = saved_errno;
6198                         return rc;
6199                 }
6200         }
6201
6202         /*
6203          * msg_tmp->msg_control (*tmp_control) was created by
6204          * swrap_recvmsg_before_unix() and msg_out->msg_control
6205          * is still the buffer of the caller.
6206          */
6207         msg_tmp->msg_control = msg_out->msg_control;
6208         msg_tmp->msg_controllen = msg_out->msg_controllen;
6209         *msg_out = *msg_tmp;
6210
6211         cm_data_space = MIN(cm_data_space, msg_out->msg_controllen);
6212         memcpy(msg_out->msg_control, cm_data, cm_data_space);
6213         msg_out->msg_controllen = cm_data_space;
6214         SAFE_FREE(cm_data);
6215         SAFE_FREE(*tmp_control);
6216
6217         SWRAP_LOG(SWRAP_LOG_TRACE,
6218                   "msg_out->msg_controllen=%zu",
6219                   (size_t)msg_out->msg_controllen);
6220         return ret;
6221 #else /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6222         int saved_errno = errno;
6223         *msg_out = *msg_tmp;
6224         SAFE_FREE(*tmp_control);
6225         errno = saved_errno;
6226         return ret;
6227 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
6228 }
6229
6230 static ssize_t swrap_sendmsg_before(int fd,
6231                                     struct socket_info *si,
6232                                     struct msghdr *msg,
6233                                     struct iovec *tmp_iov,
6234                                     struct sockaddr_un *tmp_un,
6235                                     const struct sockaddr_un **to_un,
6236                                     const struct sockaddr **to,
6237                                     int *bcast)
6238 {
6239         size_t i, len = 0;
6240         ssize_t ret = -1;
6241         struct swrap_sockaddr_buf buf = {};
6242
6243         if (to_un) {
6244                 *to_un = NULL;
6245         }
6246         if (to) {
6247                 *to = NULL;
6248         }
6249         if (bcast) {
6250                 *bcast = 0;
6251         }
6252
6253         SWRAP_LOCK_SI(si);
6254
6255         switch (si->type) {
6256         case SOCK_STREAM: {
6257                 unsigned long mtu;
6258
6259                 if (!si->connected) {
6260                         errno = ENOTCONN;
6261                         goto out;
6262                 }
6263
6264                 if (msg->msg_iovlen == 0) {
6265                         break;
6266                 }
6267
6268                 mtu = socket_wrapper_mtu();
6269                 for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6270                         size_t nlen;
6271                         nlen = len + msg->msg_iov[i].iov_len;
6272                         if (nlen < len) {
6273                                 /* overflow */
6274                                 errno = EMSGSIZE;
6275                                 goto out;
6276                         }
6277                         if (nlen > mtu) {
6278                                 break;
6279                         }
6280                 }
6281                 msg->msg_iovlen = i;
6282                 if (msg->msg_iovlen == 0) {
6283                         *tmp_iov = msg->msg_iov[0];
6284                         tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
6285                                                (size_t)mtu);
6286                         msg->msg_iov = tmp_iov;
6287                         msg->msg_iovlen = 1;
6288                 }
6289                 break;
6290         }
6291         case SOCK_DGRAM:
6292                 if (si->connected) {
6293                         if (msg->msg_name != NULL) {
6294                                 /*
6295                                  * We are dealing with unix sockets and if we
6296                                  * are connected, we should only talk to the
6297                                  * connected unix path. Using the fd to send
6298                                  * to another server would be hard to achieve.
6299                                  */
6300                                 msg->msg_name = NULL;
6301                                 msg->msg_namelen = 0;
6302                         }
6303                         SWRAP_LOG(SWRAP_LOG_TRACE,
6304                                   "connected(%s) fd=%d",
6305                                   swrap_sockaddr_string(&buf, &si->peername.sa.s),
6306                                   fd);
6307                 } else {
6308                         const struct sockaddr *msg_name;
6309                         msg_name = (const struct sockaddr *)msg->msg_name;
6310
6311                         if (msg_name == NULL) {
6312                                 errno = ENOTCONN;
6313                                 goto out;
6314                         }
6315
6316
6317                         ret = sockaddr_convert_to_un(si, msg_name, msg->msg_namelen,
6318                                                      tmp_un, 0, bcast);
6319                         if (ret == -1) {
6320                                 goto out;
6321                         }
6322
6323                         if (to_un) {
6324                                 *to_un = tmp_un;
6325                         }
6326                         if (to) {
6327                                 *to = msg_name;
6328                         }
6329                         msg->msg_name = tmp_un;
6330                         msg->msg_namelen = sizeof(*tmp_un);
6331                 }
6332
6333                 if (si->bound == 0) {
6334                         ret = swrap_auto_bind(fd, si, si->family);
6335                         if (ret == -1) {
6336                                 SWRAP_UNLOCK_SI(si);
6337                                 if (errno == ENOTSOCK) {
6338                                         swrap_remove_stale(fd);
6339                                         ret = -ENOTSOCK;
6340                                 } else {
6341                                         SWRAP_LOG(SWRAP_LOG_ERROR, "swrap_sendmsg_before failed");
6342                                 }
6343                                 return ret;
6344                         }
6345                 }
6346
6347                 if (!si->defer_connect) {
6348                         break;
6349                 }
6350
6351                 ret = sockaddr_convert_to_un(si,
6352                                              &si->peername.sa.s,
6353                                              si->peername.sa_socklen,
6354                                              tmp_un,
6355                                              0,
6356                                              NULL);
6357                 if (ret == -1) {
6358                         goto out;
6359                 }
6360
6361                 SWRAP_LOG(SWRAP_LOG_TRACE,
6362                           "deferred connect(%s) path=%s, fd=%d",
6363                           swrap_sockaddr_string(&buf, &si->peername.sa.s),
6364                           tmp_un->sun_path, fd);
6365
6366                 ret = libc_connect(fd,
6367                                    (struct sockaddr *)(void *)tmp_un,
6368                                    sizeof(*tmp_un));
6369
6370                 /* to give better errors */
6371                 if (ret == -1 && errno == ENOENT) {
6372                         errno = EHOSTUNREACH;
6373                 }
6374
6375                 if (ret == -1) {
6376                         goto out;
6377                 }
6378
6379                 si->defer_connect = 0;
6380                 break;
6381         default:
6382                 errno = EHOSTUNREACH;
6383                 goto out;
6384         }
6385
6386         ret = 0;
6387 out:
6388         SWRAP_UNLOCK_SI(si);
6389
6390         return ret;
6391 }
6392
6393 static void swrap_sendmsg_after(int fd,
6394                                 struct socket_info *si,
6395                                 struct msghdr *msg,
6396                                 const struct sockaddr *to,
6397                                 ssize_t ret)
6398 {
6399         int saved_errno = errno;
6400         size_t i, len = 0;
6401         uint8_t *buf;
6402         off_t ofs = 0;
6403         size_t avail = 0;
6404         size_t remain;
6405
6406         /* to give better errors */
6407         if (ret == -1) {
6408                 if (saved_errno == ENOENT) {
6409                         saved_errno = EHOSTUNREACH;
6410                 } else if (saved_errno == ENOTSOCK) {
6411                         /* If the fd is not a socket, remove it */
6412                         swrap_remove_stale(fd);
6413                 }
6414         }
6415
6416         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6417                 avail += msg->msg_iov[i].iov_len;
6418         }
6419
6420         if (ret == -1) {
6421                 remain = MIN(80, avail);
6422         } else {
6423                 remain = ret;
6424         }
6425
6426         /* we capture it as one single packet */
6427         buf = (uint8_t *)malloc(remain);
6428         if (!buf) {
6429                 /* we just not capture the packet */
6430                 errno = saved_errno;
6431                 return;
6432         }
6433
6434         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6435                 size_t this_time = MIN(remain, (size_t)msg->msg_iov[i].iov_len);
6436                 if (this_time > 0) {
6437                         memcpy(buf + ofs,
6438                                msg->msg_iov[i].iov_base,
6439                                this_time);
6440                 }
6441                 ofs += this_time;
6442                 remain -= this_time;
6443         }
6444         len = ofs;
6445
6446         SWRAP_LOCK_SI(si);
6447
6448         switch (si->type) {
6449         case SOCK_STREAM:
6450                 if (ret == -1) {
6451                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND, buf, len);
6452                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND_RST, NULL, 0);
6453                 } else {
6454                         swrap_pcap_dump_packet(si, NULL, SWRAP_SEND, buf, len);
6455                 }
6456                 break;
6457
6458         case SOCK_DGRAM:
6459                 if (si->connected) {
6460                         to = &si->peername.sa.s;
6461                 }
6462                 if (ret == -1) {
6463                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6464                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO_UNREACH, buf, len);
6465                 } else {
6466                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6467                 }
6468                 break;
6469         }
6470
6471         SWRAP_UNLOCK_SI(si);
6472
6473         free(buf);
6474         errno = saved_errno;
6475 }
6476
6477 static int swrap_recvmsg_before(int fd,
6478                                 struct socket_info *si,
6479                                 struct msghdr *msg,
6480                                 struct iovec *tmp_iov)
6481 {
6482         size_t i, len = 0;
6483         int ret = -1;
6484
6485         SWRAP_LOCK_SI(si);
6486
6487         (void)fd; /* unused */
6488
6489         switch (si->type) {
6490         case SOCK_STREAM: {
6491                 unsigned int mtu;
6492                 if (!si->connected) {
6493                         errno = ENOTCONN;
6494                         goto out;
6495                 }
6496
6497                 if (msg->msg_iovlen == 0) {
6498                         break;
6499                 }
6500
6501                 mtu = socket_wrapper_mtu();
6502                 for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6503                         size_t nlen;
6504                         nlen = len + msg->msg_iov[i].iov_len;
6505                         if (nlen > mtu) {
6506                                 break;
6507                         }
6508                 }
6509                 msg->msg_iovlen = i;
6510                 if (msg->msg_iovlen == 0) {
6511                         *tmp_iov = msg->msg_iov[0];
6512                         tmp_iov->iov_len = MIN((size_t)tmp_iov->iov_len,
6513                                                (size_t)mtu);
6514                         msg->msg_iov = tmp_iov;
6515                         msg->msg_iovlen = 1;
6516                 }
6517                 break;
6518         }
6519         case SOCK_DGRAM:
6520                 if (msg->msg_name == NULL) {
6521                         errno = EINVAL;
6522                         goto out;
6523                 }
6524
6525                 if (msg->msg_iovlen == 0) {
6526                         break;
6527                 }
6528
6529                 if (si->bound == 0) {
6530                         ret = swrap_auto_bind(fd, si, si->family);
6531                         if (ret == -1) {
6532                                 SWRAP_UNLOCK_SI(si);
6533                                 /*
6534                                  * When attempting to read or write to a
6535                                  * descriptor, if an underlying autobind fails
6536                                  * because it's not a socket, stop intercepting
6537                                  * uses of that descriptor.
6538                                  */
6539                                 if (errno == ENOTSOCK) {
6540                                         swrap_remove_stale(fd);
6541                                         ret = -ENOTSOCK;
6542                                 } else {
6543                                         SWRAP_LOG(SWRAP_LOG_ERROR,
6544                                                   "swrap_recvmsg_before failed");
6545                                 }
6546                                 return ret;
6547                         }
6548                 }
6549                 break;
6550         default:
6551                 errno = EHOSTUNREACH;
6552                 goto out;
6553         }
6554
6555         ret = 0;
6556 out:
6557         SWRAP_UNLOCK_SI(si);
6558
6559         return ret;
6560 }
6561
6562 static int swrap_recvmsg_after(int fd,
6563                                struct socket_info *si,
6564                                struct msghdr *msg,
6565                                const struct sockaddr_un *un_addr,
6566                                socklen_t un_addrlen,
6567                                ssize_t ret)
6568 {
6569         int saved_errno = errno;
6570         size_t i;
6571         uint8_t *buf = NULL;
6572         off_t ofs = 0;
6573         size_t avail = 0;
6574         size_t remain;
6575         int rc;
6576
6577         /* to give better errors */
6578         if (ret == -1) {
6579                 if (saved_errno == ENOENT) {
6580                         saved_errno = EHOSTUNREACH;
6581                 } else if (saved_errno == ENOTSOCK) {
6582                         /* If the fd is not a socket, remove it */
6583                         swrap_remove_stale(fd);
6584                 }
6585         }
6586
6587         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6588                 avail += msg->msg_iov[i].iov_len;
6589         }
6590
6591         SWRAP_LOCK_SI(si);
6592
6593         /* Convert the socket address before we leave */
6594         if (si->type == SOCK_DGRAM && un_addr != NULL) {
6595                 rc = sockaddr_convert_from_un(si,
6596                                               un_addr,
6597                                               un_addrlen,
6598                                               si->family,
6599                                               msg->msg_name,
6600                                               &msg->msg_namelen);
6601                 if (rc == -1) {
6602                         goto done;
6603                 }
6604         }
6605
6606         if (avail == 0) {
6607                 rc = 0;
6608                 goto done;
6609         }
6610
6611         if (ret == -1) {
6612                 remain = MIN(80, avail);
6613         } else {
6614                 remain = ret;
6615         }
6616
6617         /* we capture it as one single packet */
6618         buf = (uint8_t *)malloc(remain);
6619         if (buf == NULL) {
6620                 /* we just not capture the packet */
6621                 SWRAP_UNLOCK_SI(si);
6622                 errno = saved_errno;
6623                 return -1;
6624         }
6625
6626         for (i = 0; i < (size_t)msg->msg_iovlen; i++) {
6627                 size_t this_time = MIN(remain, (size_t)msg->msg_iov[i].iov_len);
6628                 memcpy(buf + ofs,
6629                        msg->msg_iov[i].iov_base,
6630                        this_time);
6631                 ofs += this_time;
6632                 remain -= this_time;
6633         }
6634
6635         switch (si->type) {
6636         case SOCK_STREAM:
6637                 if (ret == -1 && saved_errno != EAGAIN && saved_errno != ENOBUFS) {
6638                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV_RST, NULL, 0);
6639                 } else if (ret == 0) { /* END OF FILE */
6640                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV_RST, NULL, 0);
6641                 } else if (ret > 0) {
6642                         swrap_pcap_dump_packet(si, NULL, SWRAP_RECV, buf, ret);
6643                 }
6644                 break;
6645
6646         case SOCK_DGRAM:
6647                 if (ret == -1) {
6648                         break;
6649                 }
6650
6651                 if (un_addr != NULL) {
6652                         swrap_pcap_dump_packet(si,
6653                                           msg->msg_name,
6654                                           SWRAP_RECVFROM,
6655                                           buf,
6656                                           ret);
6657                 } else {
6658                         swrap_pcap_dump_packet(si,
6659                                           msg->msg_name,
6660                                           SWRAP_RECV,
6661                                           buf,
6662                                           ret);
6663                 }
6664
6665                 break;
6666         }
6667
6668         rc = 0;
6669 done:
6670         free(buf);
6671         errno = saved_errno;
6672
6673 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6674         if (rc == 0 &&
6675             msg->msg_controllen > 0 &&
6676             msg->msg_control != NULL) {
6677                 rc = swrap_msghdr_add_socket_info(si, msg);
6678                 if (rc < 0) {
6679                         SWRAP_UNLOCK_SI(si);
6680                         return -1;
6681                 }
6682         }
6683 #endif
6684
6685         SWRAP_UNLOCK_SI(si);
6686         return rc;
6687 }
6688
6689 /****************************************************************************
6690  *   RECVFROM
6691  ***************************************************************************/
6692
6693 static ssize_t swrap_recvfrom(int s, void *buf, size_t len, int flags,
6694                               struct sockaddr *from, socklen_t *fromlen)
6695 {
6696         struct swrap_address from_addr = {
6697                 .sa_socklen = sizeof(struct sockaddr_un),
6698         };
6699         ssize_t ret;
6700         struct socket_info *si = find_socket_info(s);
6701         struct swrap_address saddr = {
6702                 .sa_socklen = sizeof(struct sockaddr_storage),
6703         };
6704         struct msghdr msg;
6705         struct iovec tmp;
6706         int tret;
6707
6708         if (!si) {
6709                 return libc_recvfrom(s,
6710                                      buf,
6711                                      len,
6712                                      flags,
6713                                      from,
6714                                      fromlen);
6715         }
6716
6717         tmp.iov_base = buf;
6718         tmp.iov_len = len;
6719
6720         ZERO_STRUCT(msg);
6721         if (from != NULL && fromlen != NULL) {
6722                 msg.msg_name = from;   /* optional address */
6723                 msg.msg_namelen = *fromlen; /* size of address */
6724         } else {
6725                 msg.msg_name = &saddr.sa.s; /* optional address */
6726                 msg.msg_namelen = saddr.sa_socklen; /* size of address */
6727         }
6728         msg.msg_iov = &tmp;            /* scatter/gather array */
6729         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6730 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6731         msg.msg_control = NULL;        /* ancillary data, see below */
6732         msg.msg_controllen = 0;        /* ancillary data buffer len */
6733         msg.msg_flags = 0;             /* flags on received message */
6734 #endif
6735
6736         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6737         if (tret < 0) {
6738                 return -1;
6739         }
6740
6741         buf = msg.msg_iov[0].iov_base;
6742         len = msg.msg_iov[0].iov_len;
6743
6744         ret = libc_recvfrom(s,
6745                             buf,
6746                             len,
6747                             flags,
6748                             &from_addr.sa.s,
6749                             &from_addr.sa_socklen);
6750         if (ret == -1) {
6751                 return ret;
6752         }
6753
6754         tret = swrap_recvmsg_after(s,
6755                                    si,
6756                                    &msg,
6757                                    &from_addr.sa.un,
6758                                    from_addr.sa_socklen,
6759                                    ret);
6760         if (tret != 0) {
6761                 return tret;
6762         }
6763
6764         if (from != NULL && fromlen != NULL) {
6765                 *fromlen = msg.msg_namelen;
6766         }
6767
6768         return ret;
6769 }
6770
6771 #ifdef HAVE_ACCEPT_PSOCKLEN_T
6772 ssize_t recvfrom(int s, void *buf, size_t len, int flags,
6773                  struct sockaddr *from, Psocklen_t fromlen)
6774 #else
6775 ssize_t recvfrom(int s, void *buf, size_t len, int flags,
6776                  struct sockaddr *from, socklen_t *fromlen)
6777 #endif
6778 {
6779         return swrap_recvfrom(s, buf, len, flags, from, (socklen_t *)fromlen);
6780 }
6781
6782 /****************************************************************************
6783  *   SENDTO
6784  ***************************************************************************/
6785
6786 static ssize_t swrap_sendto(int s, const void *buf, size_t len, int flags,
6787                             const struct sockaddr *to, socklen_t tolen)
6788 {
6789         struct msghdr msg;
6790         struct iovec tmp;
6791         struct swrap_address un_addr = {
6792                 .sa_socklen = sizeof(struct sockaddr_un),
6793         };
6794         const struct sockaddr_un *to_un = NULL;
6795         ssize_t ret;
6796         int rc;
6797         struct socket_info *si = find_socket_info(s);
6798         int bcast = 0;
6799
6800         if (!si) {
6801                 return libc_sendto(s, buf, len, flags, to, tolen);
6802         }
6803
6804         tmp.iov_base = discard_const_p(char, buf);
6805         tmp.iov_len = len;
6806
6807         ZERO_STRUCT(msg);
6808         msg.msg_name = discard_const_p(struct sockaddr, to); /* optional address */
6809         msg.msg_namelen = tolen;       /* size of address */
6810         msg.msg_iov = &tmp;            /* scatter/gather array */
6811         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6812 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6813         msg.msg_control = NULL;        /* ancillary data, see below */
6814         msg.msg_controllen = 0;        /* ancillary data buffer len */
6815         msg.msg_flags = 0;             /* flags on received message */
6816 #endif
6817
6818         rc = swrap_sendmsg_before(s,
6819                                   si,
6820                                   &msg,
6821                                   &tmp,
6822                                   &un_addr.sa.un,
6823                                   &to_un,
6824                                   &to,
6825                                   &bcast);
6826         if (rc < 0) {
6827                 return -1;
6828         }
6829
6830         buf = msg.msg_iov[0].iov_base;
6831         len = msg.msg_iov[0].iov_len;
6832
6833         if (bcast) {
6834                 struct stat st;
6835                 unsigned int iface;
6836                 unsigned int prt = ntohs(((const struct sockaddr_in *)(const void *)to)->sin_port);
6837                 char type;
6838                 char *swrap_dir = NULL;
6839
6840                 type = SOCKET_TYPE_CHAR_UDP;
6841
6842                 swrap_dir = socket_wrapper_dir();
6843                 if (swrap_dir == NULL) {
6844                         return -1;
6845                 }
6846
6847                 for(iface=0; iface <= MAX_WRAPPED_INTERFACES; iface++) {
6848                         swrap_un_path(&un_addr.sa.un,
6849                                       swrap_dir,
6850                                       type,
6851                                       iface,
6852                                       prt);
6853                         if (stat(un_addr.sa.un.sun_path, &st) != 0) continue;
6854
6855                         /* ignore the any errors in broadcast sends */
6856                         libc_sendto(s,
6857                                     buf,
6858                                     len,
6859                                     flags,
6860                                     &un_addr.sa.s,
6861                                     un_addr.sa_socklen);
6862                 }
6863
6864                 SAFE_FREE(swrap_dir);
6865
6866                 SWRAP_LOCK_SI(si);
6867
6868                 swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
6869
6870                 SWRAP_UNLOCK_SI(si);
6871
6872                 return len;
6873         }
6874
6875         SWRAP_LOCK_SI(si);
6876         /*
6877          * If it is a dgram socket and we are connected, don't include the
6878          * 'to' address.
6879          */
6880         if (si->type == SOCK_DGRAM && si->connected) {
6881                 ret = libc_sendto(s,
6882                                   buf,
6883                                   len,
6884                                   flags,
6885                                   NULL,
6886                                   0);
6887         } else {
6888                 ret = libc_sendto(s,
6889                                   buf,
6890                                   len,
6891                                   flags,
6892                                   (struct sockaddr *)msg.msg_name,
6893                                   msg.msg_namelen);
6894         }
6895
6896         SWRAP_UNLOCK_SI(si);
6897
6898         swrap_sendmsg_after(s, si, &msg, to, ret);
6899
6900         return ret;
6901 }
6902
6903 ssize_t sendto(int s, const void *buf, size_t len, int flags,
6904                const struct sockaddr *to, socklen_t tolen)
6905 {
6906         return swrap_sendto(s, buf, len, flags, to, tolen);
6907 }
6908
6909 /****************************************************************************
6910  *   READV
6911  ***************************************************************************/
6912
6913 static ssize_t swrap_recv(int s, void *buf, size_t len, int flags)
6914 {
6915         struct socket_info *si;
6916         struct msghdr msg;
6917         struct swrap_address saddr = {
6918                 .sa_socklen = sizeof(struct sockaddr_storage),
6919         };
6920         struct iovec tmp;
6921         ssize_t ret;
6922         int tret;
6923
6924         si = find_socket_info(s);
6925         if (si == NULL) {
6926                 return libc_recv(s, buf, len, flags);
6927         }
6928
6929         tmp.iov_base = buf;
6930         tmp.iov_len = len;
6931
6932         ZERO_STRUCT(msg);
6933         msg.msg_name = &saddr.sa.s;    /* optional address */
6934         msg.msg_namelen = saddr.sa_socklen; /* size of address */
6935         msg.msg_iov = &tmp;            /* scatter/gather array */
6936         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6937 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6938         msg.msg_control = NULL;        /* ancillary data, see below */
6939         msg.msg_controllen = 0;        /* ancillary data buffer len */
6940         msg.msg_flags = 0;             /* flags on received message */
6941 #endif
6942
6943         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
6944         if (tret < 0) {
6945                 return -1;
6946         }
6947
6948         buf = msg.msg_iov[0].iov_base;
6949         len = msg.msg_iov[0].iov_len;
6950
6951         ret = libc_recv(s, buf, len, flags);
6952
6953         tret = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
6954         if (tret != 0) {
6955                 return tret;
6956         }
6957
6958         return ret;
6959 }
6960
6961 ssize_t recv(int s, void *buf, size_t len, int flags)
6962 {
6963         return swrap_recv(s, buf, len, flags);
6964 }
6965
6966 /****************************************************************************
6967  *   READ
6968  ***************************************************************************/
6969
6970 static ssize_t swrap_read(int s, void *buf, size_t len)
6971 {
6972         struct socket_info *si;
6973         struct msghdr msg;
6974         struct iovec tmp;
6975         struct swrap_address saddr = {
6976                 .sa_socklen = sizeof(struct sockaddr_storage),
6977         };
6978         ssize_t ret;
6979         int tret;
6980
6981         si = find_socket_info(s);
6982         if (si == NULL) {
6983                 return libc_read(s, buf, len);
6984         }
6985
6986         tmp.iov_base = buf;
6987         tmp.iov_len = len;
6988
6989         ZERO_STRUCT(msg);
6990         msg.msg_name = &saddr.sa.ss;   /* optional address */
6991         msg.msg_namelen = saddr.sa_socklen; /* size of address */
6992         msg.msg_iov = &tmp;            /* scatter/gather array */
6993         msg.msg_iovlen = 1;            /* # elements in msg_iov */
6994 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
6995         msg.msg_control = NULL;        /* ancillary data, see below */
6996         msg.msg_controllen = 0;        /* ancillary data buffer len */
6997         msg.msg_flags = 0;             /* flags on received message */
6998 #endif
6999
7000         tret = swrap_recvmsg_before(s, si, &msg, &tmp);
7001         if (tret < 0) {
7002                 if (tret == -ENOTSOCK) {
7003                         return libc_read(s, buf, len);
7004                 }
7005                 return -1;
7006         }
7007
7008         buf = msg.msg_iov[0].iov_base;
7009         len = msg.msg_iov[0].iov_len;
7010
7011         ret = libc_read(s, buf, len);
7012
7013         tret = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
7014         if (tret != 0) {
7015                 return tret;
7016         }
7017
7018         return ret;
7019 }
7020
7021 ssize_t read(int s, void *buf, size_t len)
7022 {
7023         return swrap_read(s, buf, len);
7024 }
7025
7026 /****************************************************************************
7027  *   WRITE
7028  ***************************************************************************/
7029
7030 static ssize_t swrap_write(int s, const void *buf, size_t len)
7031 {
7032         struct msghdr msg;
7033         struct iovec tmp;
7034         struct sockaddr_un un_addr;
7035         ssize_t ret;
7036         int rc;
7037         struct socket_info *si;
7038
7039         si = find_socket_info(s);
7040         if (si == NULL) {
7041                 return libc_write(s, buf, len);
7042         }
7043
7044         tmp.iov_base = discard_const_p(char, buf);
7045         tmp.iov_len = len;
7046
7047         ZERO_STRUCT(msg);
7048         msg.msg_name = NULL;           /* optional address */
7049         msg.msg_namelen = 0;           /* size of address */
7050         msg.msg_iov = &tmp;            /* scatter/gather array */
7051         msg.msg_iovlen = 1;            /* # elements in msg_iov */
7052 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7053         msg.msg_control = NULL;        /* ancillary data, see below */
7054         msg.msg_controllen = 0;        /* ancillary data buffer len */
7055         msg.msg_flags = 0;             /* flags on received message */
7056 #endif
7057
7058         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
7059         if (rc < 0) {
7060                 return -1;
7061         }
7062
7063         buf = msg.msg_iov[0].iov_base;
7064         len = msg.msg_iov[0].iov_len;
7065
7066         ret = libc_write(s, buf, len);
7067
7068         swrap_sendmsg_after(s, si, &msg, NULL, ret);
7069
7070         return ret;
7071 }
7072
7073 ssize_t write(int s, const void *buf, size_t len)
7074 {
7075         return swrap_write(s, buf, len);
7076 }
7077
7078 /****************************************************************************
7079  *   SEND
7080  ***************************************************************************/
7081
7082 static ssize_t swrap_send(int s, const void *buf, size_t len, int flags)
7083 {
7084         struct msghdr msg;
7085         struct iovec tmp;
7086         struct sockaddr_un un_addr;
7087         ssize_t ret;
7088         int rc;
7089         struct socket_info *si = find_socket_info(s);
7090
7091         if (!si) {
7092                 return libc_send(s, buf, len, flags);
7093         }
7094
7095         tmp.iov_base = discard_const_p(char, buf);
7096         tmp.iov_len = len;
7097
7098         ZERO_STRUCT(msg);
7099         msg.msg_name = NULL;           /* optional address */
7100         msg.msg_namelen = 0;           /* size of address */
7101         msg.msg_iov = &tmp;            /* scatter/gather array */
7102         msg.msg_iovlen = 1;            /* # elements in msg_iov */
7103 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7104         msg.msg_control = NULL;        /* ancillary data, see below */
7105         msg.msg_controllen = 0;        /* ancillary data buffer len */
7106         msg.msg_flags = 0;             /* flags on received message */
7107 #endif
7108
7109         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
7110         if (rc < 0) {
7111                 return -1;
7112         }
7113
7114         buf = msg.msg_iov[0].iov_base;
7115         len = msg.msg_iov[0].iov_len;
7116
7117         ret = libc_send(s, buf, len, flags);
7118
7119         swrap_sendmsg_after(s, si, &msg, NULL, ret);
7120
7121         return ret;
7122 }
7123
7124 ssize_t send(int s, const void *buf, size_t len, int flags)
7125 {
7126         return swrap_send(s, buf, len, flags);
7127 }
7128
7129 /****************************************************************************
7130  *   RECVMSG
7131  ***************************************************************************/
7132
7133 static ssize_t swrap_recvmsg(int s, struct msghdr *omsg, int flags)
7134 {
7135         struct swrap_address from_addr = {
7136                 .sa_socklen = sizeof(struct sockaddr_un),
7137         };
7138         struct swrap_address convert_addr = {
7139                 .sa_socklen = sizeof(struct sockaddr_storage),
7140         };
7141         struct socket_info *si;
7142         struct msghdr msg;
7143         struct iovec tmp;
7144 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7145         size_t msg_ctrllen_filled;
7146         size_t msg_ctrllen_left;
7147 #endif
7148
7149         ssize_t ret;
7150         int rc;
7151
7152         si = find_socket_info(s);
7153         if (si == NULL) {
7154                 uint8_t *tmp_control = NULL;
7155                 rc = swrap_recvmsg_before_unix(omsg, &msg, &tmp_control);
7156                 if (rc < 0) {
7157                         return rc;
7158                 }
7159                 ret = libc_recvmsg(s, &msg, flags);
7160                 return swrap_recvmsg_after_unix(&msg, &tmp_control, omsg, ret);
7161         }
7162
7163         tmp.iov_base = NULL;
7164         tmp.iov_len = 0;
7165
7166         ZERO_STRUCT(msg);
7167         msg.msg_name = &from_addr.sa;              /* optional address */
7168         msg.msg_namelen = from_addr.sa_socklen;    /* size of address */
7169         msg.msg_iov = omsg->msg_iov;               /* scatter/gather array */
7170         msg.msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7171 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7172         msg_ctrllen_filled = 0;
7173         msg_ctrllen_left = omsg->msg_controllen;
7174
7175         msg.msg_control = omsg->msg_control;       /* ancillary data, see below */
7176         msg.msg_controllen = omsg->msg_controllen; /* ancillary data buffer len */
7177         msg.msg_flags = omsg->msg_flags;           /* flags on received message */
7178 #endif
7179
7180         rc = swrap_recvmsg_before(s, si, &msg, &tmp);
7181         if (rc < 0) {
7182                 return -1;
7183         }
7184
7185         ret = libc_recvmsg(s, &msg, flags);
7186
7187 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7188         msg_ctrllen_filled += msg.msg_controllen;
7189         msg_ctrllen_left -= msg.msg_controllen;
7190
7191         if (omsg->msg_control != NULL) {
7192                 uint8_t *p;
7193
7194                 p = omsg->msg_control;
7195                 p += msg_ctrllen_filled;
7196
7197                 msg.msg_control = p;
7198                 msg.msg_controllen = msg_ctrllen_left;
7199         } else {
7200                 msg.msg_control = NULL;
7201                 msg.msg_controllen = 0;
7202         }
7203 #endif
7204
7205         /*
7206          * We convert the unix address to a IP address so we need a buffer
7207          * which can store the address in case of SOCK_DGRAM, see below.
7208          */
7209         msg.msg_name = &convert_addr.sa;
7210         msg.msg_namelen = convert_addr.sa_socklen;
7211
7212         rc = swrap_recvmsg_after(s,
7213                                  si,
7214                                  &msg,
7215                                  &from_addr.sa.un,
7216                                  from_addr.sa_socklen,
7217                                  ret);
7218         if (rc != 0) {
7219                 return rc;
7220         }
7221
7222 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7223         if (omsg->msg_control != NULL) {
7224                 /* msg.msg_controllen = space left */
7225                 msg_ctrllen_left = msg.msg_controllen;
7226                 msg_ctrllen_filled = omsg->msg_controllen - msg_ctrllen_left;
7227         }
7228
7229         /* Update the original message length */
7230         omsg->msg_controllen = msg_ctrllen_filled;
7231         omsg->msg_flags = msg.msg_flags;
7232 #endif
7233         omsg->msg_iovlen = msg.msg_iovlen;
7234
7235         SWRAP_LOCK_SI(si);
7236
7237         /*
7238          * From the manpage:
7239          *
7240          * The  msg_name  field  points  to a caller-allocated buffer that is
7241          * used to return the source address if the socket is unconnected.  The
7242          * caller should set msg_namelen to the size of this buffer before this
7243          * call; upon return from a successful call, msg_name will contain the
7244          * length of the returned address.  If the application  does  not  need
7245          * to know the source address, msg_name can be specified as NULL.
7246          */
7247         if (si->type == SOCK_STREAM) {
7248                 omsg->msg_namelen = 0;
7249         } else if (omsg->msg_name != NULL &&
7250                    omsg->msg_namelen != 0 &&
7251                    omsg->msg_namelen >= msg.msg_namelen) {
7252                 memcpy(omsg->msg_name, msg.msg_name, msg.msg_namelen);
7253                 omsg->msg_namelen = msg.msg_namelen;
7254         }
7255
7256         SWRAP_UNLOCK_SI(si);
7257
7258         return ret;
7259 }
7260
7261 ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags)
7262 {
7263         return swrap_recvmsg(sockfd, msg, flags);
7264 }
7265
7266 /****************************************************************************
7267  *   RECVMMSG
7268  ***************************************************************************/
7269
7270 #ifdef HAVE_RECVMMSG
7271 #if defined(HAVE_RECVMMSG_SSIZE_T_CONST_TIMEOUT)
7272 /* FreeBSD */
7273 static ssize_t swrap_recvmmsg(int s, struct mmsghdr *omsgvec, size_t _vlen, int flags, const struct timespec *timeout)
7274 #elif defined(HAVE_RECVMMSG_CONST_TIMEOUT)
7275 /* Linux legacy glibc < 2.21 */
7276 static int swrap_recvmmsg(int s, struct mmsghdr *omsgvec, unsigned int _vlen, int flags, const struct timespec *timeout)
7277 #else
7278 /* Linux glibc >= 2.21 */
7279 static int swrap_recvmmsg(int s, struct mmsghdr *omsgvec, unsigned int _vlen, int flags, struct timespec *timeout)
7280 #endif
7281 {
7282         struct socket_info *si = find_socket_info(s);
7283 #define __SWRAP_RECVMMSG_MAX_VLEN 16
7284         struct mmsghdr msgvec[__SWRAP_RECVMMSG_MAX_VLEN] = {};
7285         struct {
7286                 struct iovec iov;
7287                 struct swrap_address from_addr;
7288                 struct swrap_address convert_addr;
7289 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7290                 size_t msg_ctrllen_filled;
7291                 size_t msg_ctrllen_left;
7292 #endif
7293         } tmp[__SWRAP_RECVMMSG_MAX_VLEN] = {};
7294         int vlen;
7295         int i;
7296         int ret;
7297         int rc;
7298         int saved_errno;
7299
7300         if (_vlen > __SWRAP_RECVMMSG_MAX_VLEN) {
7301                 vlen = __SWRAP_RECVMMSG_MAX_VLEN;
7302         } else {
7303                 vlen = _vlen;
7304         }
7305
7306         if (si == NULL) {
7307                 uint8_t *tmp_control[__SWRAP_RECVMMSG_MAX_VLEN] = { NULL, };
7308
7309                 for (i = 0; i < vlen; i++) {
7310                         struct msghdr *omsg = &omsgvec[i].msg_hdr;
7311                         struct msghdr *msg = &msgvec[i].msg_hdr;
7312
7313                         rc = swrap_recvmsg_before_unix(omsg, msg,
7314                                                        &tmp_control[i]);
7315                         if (rc < 0) {
7316                                 ret = rc;
7317                                 goto fail_libc;
7318                         }
7319                 }
7320
7321                 ret = libc_recvmmsg(s, msgvec, vlen, flags, timeout);
7322                 if (ret < 0) {
7323                         goto fail_libc;
7324                 }
7325
7326                 for (i = 0; i < ret; i++) {
7327                         omsgvec[i].msg_len = msgvec[i].msg_len;
7328                 }
7329
7330 fail_libc:
7331                 saved_errno = errno;
7332                 for (i = 0; i < vlen; i++) {
7333                         struct msghdr *omsg = &omsgvec[i].msg_hdr;
7334                         struct msghdr *msg = &msgvec[i].msg_hdr;
7335
7336                         if (i == 0 || i < ret) {
7337                                 swrap_recvmsg_after_unix(msg, &tmp_control[i], omsg, ret);
7338                         }
7339                         SAFE_FREE(tmp_control[i]);
7340                 }
7341                 errno = saved_errno;
7342
7343                 return ret;
7344         }
7345
7346         for (i = 0; i < vlen; i++) {
7347                 struct msghdr *omsg = &omsgvec[i].msg_hdr;
7348                 struct msghdr *msg = &msgvec[i].msg_hdr;
7349
7350                 tmp[i].from_addr.sa_socklen = sizeof(struct sockaddr_un);
7351                 tmp[i].convert_addr.sa_socklen = sizeof(struct sockaddr_storage);
7352
7353                 msg->msg_name = &tmp[i].from_addr.sa;              /* optional address */
7354                 msg->msg_namelen = tmp[i].from_addr.sa_socklen;    /* size of address */
7355                 msg->msg_iov = omsg->msg_iov;               /* scatter/gather array */
7356                 msg->msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7357 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7358                 tmp[i].msg_ctrllen_filled = 0;
7359                 tmp[i].msg_ctrllen_left = omsg->msg_controllen;
7360
7361                 msg->msg_control = omsg->msg_control;       /* ancillary data, see below */
7362                 msg->msg_controllen = omsg->msg_controllen; /* ancillary data buffer len */
7363                 msg->msg_flags = omsg->msg_flags;           /* flags on received message */
7364 #endif
7365
7366                 rc = swrap_recvmsg_before(s, si, msg, &tmp[i].iov);
7367                 if (rc < 0) {
7368                         ret = rc;
7369                         goto fail_swrap;
7370                 }
7371         }
7372
7373         ret = libc_recvmmsg(s, msgvec, vlen, flags, timeout);
7374         if (ret < 0) {
7375                 goto fail_swrap;
7376         }
7377
7378         for (i = 0; i < ret; i++) {
7379                 omsgvec[i].msg_len = msgvec[i].msg_len;
7380         }
7381
7382 fail_swrap:
7383
7384         saved_errno = errno;
7385         for (i = 0; i < vlen; i++) {
7386                 struct msghdr *omsg = &omsgvec[i].msg_hdr;
7387                 struct msghdr *msg = &msgvec[i].msg_hdr;
7388
7389                 if (!(i == 0 || i < ret)) {
7390                         break;
7391                 }
7392
7393 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7394                 tmp[i].msg_ctrllen_filled += msg->msg_controllen;
7395                 tmp[i].msg_ctrllen_left -= msg->msg_controllen;
7396
7397                 if (omsg->msg_control != NULL) {
7398                         uint8_t *p;
7399
7400                         p = omsg->msg_control;
7401                         p += tmp[i].msg_ctrllen_filled;
7402
7403                         msg->msg_control = p;
7404                         msg->msg_controllen = tmp[i].msg_ctrllen_left;
7405                 } else {
7406                         msg->msg_control = NULL;
7407                         msg->msg_controllen = 0;
7408                 }
7409 #endif
7410
7411                 /*
7412                  * We convert the unix address to a IP address so we need a buffer
7413                  * which can store the address in case of SOCK_DGRAM, see below.
7414                  */
7415                 msg->msg_name = &tmp[i].convert_addr.sa;
7416                 msg->msg_namelen = tmp[i].convert_addr.sa_socklen;
7417
7418                 swrap_recvmsg_after(s, si, msg,
7419                                     &tmp[i].from_addr.sa.un,
7420                                     tmp[i].from_addr.sa_socklen,
7421                                     ret);
7422
7423 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7424                 if (omsg->msg_control != NULL) {
7425                         /* msg->msg_controllen = space left */
7426                         tmp[i].msg_ctrllen_left = msg->msg_controllen;
7427                         tmp[i].msg_ctrllen_filled = omsg->msg_controllen - tmp[i].msg_ctrllen_left;
7428                 }
7429
7430                 /* Update the original message length */
7431                 omsg->msg_controllen = tmp[i].msg_ctrllen_filled;
7432                 omsg->msg_flags = msg->msg_flags;
7433 #endif
7434                 omsg->msg_iovlen = msg->msg_iovlen;
7435
7436                 SWRAP_LOCK_SI(si);
7437
7438                 /*
7439                  * From the manpage:
7440                  *
7441                  * The  msg_name  field  points  to a caller-allocated buffer that is
7442                  * used to return the source address if the socket is unconnected.  The
7443                  * caller should set msg_namelen to the size of this buffer before this
7444                  * call; upon return from a successful call, msg_name will contain the
7445                  * length of the returned address.  If the application  does  not  need
7446                  * to know the source address, msg_name can be specified as NULL.
7447                  */
7448                 if (si->type == SOCK_STREAM) {
7449                         omsg->msg_namelen = 0;
7450                 } else if (omsg->msg_name != NULL &&
7451                            omsg->msg_namelen != 0 &&
7452                            omsg->msg_namelen >= msg->msg_namelen) {
7453                         memcpy(omsg->msg_name, msg->msg_name, msg->msg_namelen);
7454                         omsg->msg_namelen = msg->msg_namelen;
7455                 }
7456
7457                 SWRAP_UNLOCK_SI(si);
7458         }
7459         errno = saved_errno;
7460
7461         return ret;
7462 }
7463
7464 #if defined(HAVE_RECVMMSG_SSIZE_T_CONST_TIMEOUT)
7465 /* FreeBSD */
7466 ssize_t recvmmsg(int sockfd, struct mmsghdr *msgvec, size_t vlen, int flags, const struct timespec *timeout)
7467 #elif defined(HAVE_RECVMMSG_CONST_TIMEOUT)
7468 /* Linux legacy glibc < 2.21 */
7469 int recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags, const struct timespec *timeout)
7470 #else
7471 /* Linux glibc >= 2.21 */
7472 int recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags, struct timespec *timeout)
7473 #endif
7474 {
7475         return swrap_recvmmsg(sockfd, msgvec, vlen, flags, timeout);
7476 }
7477 #endif /* HAVE_RECVMMSG */
7478
7479 /****************************************************************************
7480  *   SENDMSG
7481  ***************************************************************************/
7482
7483 static ssize_t swrap_sendmsg(int s, const struct msghdr *omsg, int flags)
7484 {
7485         struct msghdr msg;
7486         struct iovec tmp;
7487         struct sockaddr_un un_addr;
7488         const struct sockaddr_un *to_un = NULL;
7489         const struct sockaddr *to = NULL;
7490         ssize_t ret;
7491         int rc;
7492         struct socket_info *si = find_socket_info(s);
7493         int bcast = 0;
7494
7495         if (!si) {
7496                 int scm_rights_pipe_fd = -1;
7497
7498                 rc = swrap_sendmsg_before_unix(omsg, &msg,
7499                                                &scm_rights_pipe_fd);
7500                 if (rc < 0) {
7501                         return rc;
7502                 }
7503                 ret = libc_sendmsg(s, &msg, flags);
7504                 return swrap_sendmsg_after_unix(&msg, ret, scm_rights_pipe_fd);
7505         }
7506
7507         ZERO_STRUCT(un_addr);
7508
7509         tmp.iov_base = NULL;
7510         tmp.iov_len = 0;
7511
7512         ZERO_STRUCT(msg);
7513
7514         SWRAP_LOCK_SI(si);
7515
7516         if (si->connected == 0) {
7517                 msg.msg_name = omsg->msg_name;             /* optional address */
7518                 msg.msg_namelen = omsg->msg_namelen;       /* size of address */
7519         }
7520         msg.msg_iov = omsg->msg_iov;               /* scatter/gather array */
7521         msg.msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7522
7523         SWRAP_UNLOCK_SI(si);
7524
7525 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7526         if (omsg != NULL && omsg->msg_controllen > 0 && omsg->msg_control != NULL) {
7527                 uint8_t *cmbuf = NULL;
7528                 size_t cmlen = 0;
7529
7530                 rc = swrap_sendmsg_filter_cmsghdr(omsg, &cmbuf, &cmlen);
7531                 if (rc < 0) {
7532                         return rc;
7533                 }
7534
7535                 if (cmlen == 0) {
7536                         msg.msg_controllen = 0;
7537                         msg.msg_control = NULL;
7538                 } else {
7539                         msg.msg_control = cmbuf;
7540                         msg.msg_controllen = cmlen;
7541                 }
7542         }
7543         msg.msg_flags = omsg->msg_flags;           /* flags on received message */
7544 #endif
7545         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, &to_un, &to, &bcast);
7546         if (rc < 0) {
7547                 int saved_errno = errno;
7548 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7549                 SAFE_FREE(msg.msg_control);
7550 #endif
7551                 errno = saved_errno;
7552                 return -1;
7553         }
7554
7555         if (bcast) {
7556                 struct stat st;
7557                 unsigned int iface;
7558                 unsigned int prt = ntohs(((const struct sockaddr_in *)(const void *)to)->sin_port);
7559                 char type;
7560                 size_t i, len = 0;
7561                 uint8_t *buf;
7562                 off_t ofs = 0;
7563                 size_t avail = 0;
7564                 size_t remain;
7565                 char *swrap_dir = NULL;
7566
7567                 for (i = 0; i < (size_t)msg.msg_iovlen; i++) {
7568                         avail += msg.msg_iov[i].iov_len;
7569                 }
7570
7571                 len = avail;
7572                 remain = avail;
7573
7574                 /* we capture it as one single packet */
7575                 buf = (uint8_t *)malloc(remain);
7576                 if (!buf) {
7577                         int saved_errno = errno;
7578 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7579                         SAFE_FREE(msg.msg_control);
7580 #endif
7581                         errno = saved_errno;
7582                         return -1;
7583                 }
7584
7585                 for (i = 0; i < (size_t)msg.msg_iovlen; i++) {
7586                         size_t this_time = MIN(remain, (size_t)msg.msg_iov[i].iov_len);
7587                         memcpy(buf + ofs,
7588                                msg.msg_iov[i].iov_base,
7589                                this_time);
7590                         ofs += this_time;
7591                         remain -= this_time;
7592                 }
7593
7594                 type = SOCKET_TYPE_CHAR_UDP;
7595
7596                 swrap_dir = socket_wrapper_dir();
7597                 if (swrap_dir == NULL) {
7598                         int saved_errno = errno;
7599 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7600                         SAFE_FREE(msg.msg_control);
7601 #endif
7602                         SAFE_FREE(buf);
7603                         errno = saved_errno;
7604                         return -1;
7605                 }
7606
7607                 for(iface=0; iface <= MAX_WRAPPED_INTERFACES; iface++) {
7608                         swrap_un_path(&un_addr, swrap_dir, type, iface, prt);
7609                         if (stat(un_addr.sun_path, &st) != 0) continue;
7610
7611                         msg.msg_name = &un_addr;           /* optional address */
7612                         msg.msg_namelen = sizeof(un_addr); /* size of address */
7613
7614                         /* ignore the any errors in broadcast sends */
7615                         libc_sendmsg(s, &msg, flags);
7616                 }
7617
7618                 SAFE_FREE(swrap_dir);
7619
7620                 SWRAP_LOCK_SI(si);
7621
7622                 swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
7623                 free(buf);
7624
7625                 SWRAP_UNLOCK_SI(si);
7626
7627                 return len;
7628         }
7629
7630         ret = libc_sendmsg(s, &msg, flags);
7631
7632         swrap_sendmsg_after(s, si, &msg, to, ret);
7633
7634 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7635         {
7636                 int saved_errno = errno;
7637                 SAFE_FREE(msg.msg_control);
7638                 errno = saved_errno;
7639         }
7640 #endif
7641
7642         return ret;
7643 }
7644
7645 ssize_t sendmsg(int s, const struct msghdr *omsg, int flags)
7646 {
7647         return swrap_sendmsg(s, omsg, flags);
7648 }
7649
7650 /****************************************************************************
7651  *   SENDMMSG
7652  ***************************************************************************/
7653
7654 #ifdef HAVE_SENDMMSG
7655 #if defined(HAVE_SENDMMSG_SSIZE_T)
7656 /* FreeBSD */
7657 static ssize_t swrap_sendmmsg(int s, struct mmsghdr *omsgvec, size_t _vlen, int flags)
7658 #else
7659 /* Linux */
7660 static int swrap_sendmmsg(int s, struct mmsghdr *omsgvec, unsigned int _vlen, int flags)
7661 #endif
7662 {
7663         struct socket_info *si = find_socket_info(s);
7664 #define __SWRAP_SENDMMSG_MAX_VLEN 16
7665         struct mmsghdr msgvec[__SWRAP_SENDMMSG_MAX_VLEN] = {};
7666         struct {
7667                 struct iovec iov;
7668                 struct sockaddr_un un_addr;
7669                 const struct sockaddr_un *to_un;
7670                 const struct sockaddr *to;
7671                 int bcast;
7672         } tmp[__SWRAP_SENDMMSG_MAX_VLEN] = {};
7673         int vlen;
7674         int i;
7675         char *swrap_dir = NULL;
7676         int connected = 0;
7677         int found_bcast = 0;
7678         int ret;
7679         int rc;
7680         int saved_errno;
7681
7682         if (_vlen > __SWRAP_SENDMMSG_MAX_VLEN) {
7683                 vlen = __SWRAP_SENDMMSG_MAX_VLEN;
7684         } else {
7685                 vlen = _vlen;
7686         }
7687
7688         if (!si) {
7689                 int scm_rights_pipe_fd[__SWRAP_SENDMMSG_MAX_VLEN];
7690
7691                 for (i = 0; i < __SWRAP_SENDMMSG_MAX_VLEN; i++) {
7692                         scm_rights_pipe_fd[i] = -1;
7693                 }
7694
7695                 for (i = 0; i < vlen; i++) {
7696                         struct msghdr *omsg = &omsgvec[i].msg_hdr;
7697                         struct msghdr *msg = &msgvec[i].msg_hdr;
7698
7699                         rc = swrap_sendmsg_before_unix(omsg, msg,
7700                                                        &scm_rights_pipe_fd[i]);
7701                         if (rc < 0) {
7702                                 ret = rc;
7703                                 goto fail_libc;
7704                         }
7705                 }
7706
7707                 ret = libc_sendmmsg(s, msgvec, vlen, flags);
7708                 if (ret < 0) {
7709                         goto fail_libc;
7710                 }
7711
7712                 for (i = 0; i < ret; i++) {
7713                         omsgvec[i].msg_len = msgvec[i].msg_len;
7714                 }
7715
7716 fail_libc:
7717                 saved_errno = errno;
7718                 for (i = 0; i < vlen; i++) {
7719                         struct msghdr *msg = &msgvec[i].msg_hdr;
7720
7721                         swrap_sendmsg_after_unix(msg, ret,
7722                                                  scm_rights_pipe_fd[i]);
7723                 }
7724                 errno = saved_errno;
7725
7726                 return ret;
7727         }
7728
7729         SWRAP_LOCK_SI(si);
7730         connected = si->connected;
7731         SWRAP_UNLOCK_SI(si);
7732
7733         for (i = 0; i < vlen; i++) {
7734                 struct msghdr *omsg = &omsgvec[i].msg_hdr;
7735                 struct msghdr *msg = &msgvec[i].msg_hdr;
7736
7737                 if (connected == 0) {
7738                         msg->msg_name = omsg->msg_name;             /* optional address */
7739                         msg->msg_namelen = omsg->msg_namelen;       /* size of address */
7740                 }
7741                 msg->msg_iov = omsg->msg_iov;               /* scatter/gather array */
7742                 msg->msg_iovlen = omsg->msg_iovlen;         /* # elements in msg_iov */
7743
7744 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7745                 if (omsg->msg_controllen > 0 && omsg->msg_control != NULL) {
7746                         uint8_t *cmbuf = NULL;
7747                         size_t cmlen = 0;
7748
7749                         rc = swrap_sendmsg_filter_cmsghdr(omsg, &cmbuf, &cmlen);
7750                         if (rc < 0) {
7751                                 ret = rc;
7752                                 goto fail_swrap;
7753                         }
7754
7755                         if (cmlen != 0) {
7756                                 msg->msg_control = cmbuf;
7757                                 msg->msg_controllen = cmlen;
7758                         }
7759                 }
7760                 msg->msg_flags = omsg->msg_flags;           /* flags on received message */
7761 #endif
7762
7763                 rc = swrap_sendmsg_before(s, si, msg,
7764                                           &tmp[i].iov,
7765                                           &tmp[i].un_addr,
7766                                           &tmp[i].to_un,
7767                                           &tmp[i].to,
7768                                           &tmp[i].bcast);
7769                 if (rc < 0) {
7770                         ret = rc;
7771                         goto fail_swrap;
7772                 }
7773
7774                 if (tmp[i].bcast) {
7775                         found_bcast = 1;
7776                 }
7777         }
7778
7779         if (found_bcast) {
7780
7781                 swrap_dir = socket_wrapper_dir();
7782                 if (swrap_dir == NULL) {
7783                         ret = -1;
7784                         goto fail_swrap;
7785                 }
7786
7787                 for (i = 0; i < vlen; i++) {
7788                         struct msghdr *msg = &msgvec[i].msg_hdr;
7789                         struct sockaddr_un *un_addr = &tmp[i].un_addr;
7790                         const struct sockaddr *to = tmp[i].to;
7791                         struct stat st;
7792                         unsigned int iface;
7793                         unsigned int prt = ntohs(((const struct sockaddr_in *)(const void *)to)->sin_port);
7794                         char type;
7795                         size_t l, len = 0;
7796                         uint8_t *buf;
7797                         off_t ofs = 0;
7798                         size_t avail = 0;
7799                         size_t remain;
7800
7801                         for (l = 0; l < (size_t)msg->msg_iovlen; l++) {
7802                                 avail += msg->msg_iov[l].iov_len;
7803                         }
7804
7805                         len = avail;
7806                         remain = avail;
7807
7808                         /* we capture it as one single packet */
7809                         buf = (uint8_t *)malloc(remain);
7810                         if (!buf) {
7811                                 ret = -1;
7812                                 goto fail_swrap;
7813                         }
7814
7815                         for (l = 0; l < (size_t)msg->msg_iovlen; l++) {
7816                                 size_t this_time = MIN(remain, (size_t)msg->msg_iov[l].iov_len);
7817                                 memcpy(buf + ofs,
7818                                        msg->msg_iov[l].iov_base,
7819                                        this_time);
7820                                 ofs += this_time;
7821                                 remain -= this_time;
7822                         }
7823
7824                         type = SOCKET_TYPE_CHAR_UDP;
7825
7826                         for(iface=0; iface <= MAX_WRAPPED_INTERFACES; iface++) {
7827                                 swrap_un_path(un_addr, swrap_dir, type, iface, prt);
7828                                 if (stat(un_addr->sun_path, &st) != 0) continue;
7829
7830                                 msg->msg_name = un_addr;             /* optional address */
7831                                 msg->msg_namelen = sizeof(*un_addr); /* size of address */
7832
7833                                 /*
7834                                  * ignore the any errors in broadcast sends and
7835                                  * do a single sendmsg instead of sendmmsg
7836                                  */
7837                                 libc_sendmsg(s, msg, flags);
7838                         }
7839
7840                         SWRAP_LOCK_SI(si);
7841                         swrap_pcap_dump_packet(si, to, SWRAP_SENDTO, buf, len);
7842                         SWRAP_UNLOCK_SI(si);
7843
7844                         SAFE_FREE(buf);
7845
7846                         msgvec[i].msg_len = len;
7847                 }
7848
7849                 ret = vlen;
7850                 goto bcast_done;
7851         }
7852
7853         ret = libc_sendmmsg(s, msgvec, vlen, flags);
7854         if (ret < 0) {
7855                 goto fail_swrap;
7856         }
7857
7858 bcast_done:
7859         for (i = 0; i < ret; i++) {
7860                 omsgvec[i].msg_len = msgvec[i].msg_len;
7861         }
7862
7863 fail_swrap:
7864         saved_errno = errno;
7865         for (i = 0; i < vlen; i++) {
7866                 struct msghdr *msg = &msgvec[i].msg_hdr;
7867
7868                 if (i == 0 || i < ret) {
7869                         swrap_sendmsg_after(s, si, msg, tmp[i].to, ret);
7870                 }
7871 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7872                 SAFE_FREE(msg->msg_control);
7873 #endif
7874         }
7875         SAFE_FREE(swrap_dir);
7876         errno = saved_errno;
7877
7878         return ret;
7879 }
7880
7881 #if defined(HAVE_SENDMMSG_SSIZE_T)
7882 /* FreeBSD */
7883 ssize_t sendmmsg(int s, struct mmsghdr *msgvec, size_t vlen, int flags)
7884 #else
7885 /* Linux */
7886 int sendmmsg(int s, struct mmsghdr *msgvec, unsigned int vlen, int flags)
7887 #endif
7888 {
7889         return swrap_sendmmsg(s, msgvec, vlen, flags);
7890 }
7891 #endif /* HAVE_SENDMMSG */
7892
7893 /****************************************************************************
7894  *   READV
7895  ***************************************************************************/
7896
7897 static ssize_t swrap_readv(int s, const struct iovec *vector, int count)
7898 {
7899         struct socket_info *si;
7900         struct msghdr msg;
7901         struct iovec tmp;
7902         struct swrap_address saddr = {
7903                 .sa_socklen = sizeof(struct sockaddr_storage)
7904         };
7905         ssize_t ret;
7906         int rc;
7907
7908         si = find_socket_info(s);
7909         if (si == NULL) {
7910                 return libc_readv(s, vector, count);
7911         }
7912
7913         tmp.iov_base = NULL;
7914         tmp.iov_len = 0;
7915
7916         ZERO_STRUCT(msg);
7917         msg.msg_name = &saddr.sa.s; /* optional address */
7918         msg.msg_namelen = saddr.sa_socklen;      /* size of address */
7919         msg.msg_iov = discard_const_p(struct iovec, vector); /* scatter/gather array */
7920         msg.msg_iovlen = count;        /* # elements in msg_iov */
7921 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7922         msg.msg_control = NULL;        /* ancillary data, see below */
7923         msg.msg_controllen = 0;        /* ancillary data buffer len */
7924         msg.msg_flags = 0;             /* flags on received message */
7925 #endif
7926
7927         rc = swrap_recvmsg_before(s, si, &msg, &tmp);
7928         if (rc < 0) {
7929                 if (rc == -ENOTSOCK) {
7930                         return libc_readv(s, vector, count);
7931                 }
7932                 return -1;
7933         }
7934
7935         ret = libc_readv(s, msg.msg_iov, msg.msg_iovlen);
7936
7937         rc = swrap_recvmsg_after(s, si, &msg, NULL, 0, ret);
7938         if (rc != 0) {
7939                 return rc;
7940         }
7941
7942         return ret;
7943 }
7944
7945 ssize_t readv(int s, const struct iovec *vector, int count)
7946 {
7947         return swrap_readv(s, vector, count);
7948 }
7949
7950 /****************************************************************************
7951  *   WRITEV
7952  ***************************************************************************/
7953
7954 static ssize_t swrap_writev(int s, const struct iovec *vector, int count)
7955 {
7956         struct msghdr msg;
7957         struct iovec tmp;
7958         struct sockaddr_un un_addr;
7959         ssize_t ret;
7960         int rc;
7961         struct socket_info *si = find_socket_info(s);
7962
7963         if (!si) {
7964                 return libc_writev(s, vector, count);
7965         }
7966
7967         tmp.iov_base = NULL;
7968         tmp.iov_len = 0;
7969
7970         ZERO_STRUCT(msg);
7971         msg.msg_name = NULL;           /* optional address */
7972         msg.msg_namelen = 0;           /* size of address */
7973         msg.msg_iov = discard_const_p(struct iovec, vector); /* scatter/gather array */
7974         msg.msg_iovlen = count;        /* # elements in msg_iov */
7975 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
7976         msg.msg_control = NULL;        /* ancillary data, see below */
7977         msg.msg_controllen = 0;        /* ancillary data buffer len */
7978         msg.msg_flags = 0;             /* flags on received message */
7979 #endif
7980
7981         rc = swrap_sendmsg_before(s, si, &msg, &tmp, &un_addr, NULL, NULL, NULL);
7982         if (rc < 0) {
7983                 if (rc == -ENOTSOCK) {
7984                         return libc_readv(s, vector, count);
7985                 }
7986                 return -1;
7987         }
7988
7989         ret = libc_writev(s, msg.msg_iov, msg.msg_iovlen);
7990
7991         swrap_sendmsg_after(s, si, &msg, NULL, ret);
7992
7993         return ret;
7994 }
7995
7996 ssize_t writev(int s, const struct iovec *vector, int count)
7997 {
7998         return swrap_writev(s, vector, count);
7999 }
8000
8001 /****************************
8002  * CLOSE
8003  ***************************/
8004
8005 static int swrap_remove_wrapper(const char *__func_name,
8006                                 int (*__close_fd_fn)(int fd),
8007                                 int fd)
8008 {
8009         struct socket_info *si = NULL;
8010         int si_index;
8011         int ret_errno = errno;
8012         int ret;
8013
8014         swrap_mutex_lock(&socket_reset_mutex);
8015
8016         si_index = find_socket_info_index(fd);
8017         if (si_index == -1) {
8018                 swrap_mutex_unlock(&socket_reset_mutex);
8019                 return __close_fd_fn(fd);
8020         }
8021
8022         swrap_log(SWRAP_LOG_TRACE, __func_name, "Remove wrapper for fd=%d", fd);
8023         reset_socket_info_index(fd);
8024
8025         si = swrap_get_socket_info(si_index);
8026
8027         swrap_mutex_lock(&first_free_mutex);
8028         SWRAP_LOCK_SI(si);
8029
8030         ret = __close_fd_fn(fd);
8031         if (ret == -1) {
8032                 ret_errno = errno;
8033         }
8034
8035         swrap_dec_refcount(si);
8036
8037         if (swrap_get_refcount(si) > 0) {
8038                 /* there are still references left */
8039                 goto out;
8040         }
8041
8042         if (si->fd_passed) {
8043                 goto set_next_free;
8044         }
8045
8046         if (si->myname.sa_socklen > 0 && si->peername.sa_socklen > 0) {
8047                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_SEND, NULL, 0);
8048         }
8049
8050         if (si->myname.sa_socklen > 0 && si->peername.sa_socklen > 0) {
8051                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_RECV, NULL, 0);
8052                 swrap_pcap_dump_packet(si, NULL, SWRAP_CLOSE_ACK, NULL, 0);
8053         }
8054
8055         if (si->un_addr.sun_path[0] != '\0') {
8056                 unlink(si->un_addr.sun_path);
8057         }
8058
8059 set_next_free:
8060         swrap_set_next_free(si, first_free);
8061         first_free = si_index;
8062
8063 out:
8064         SWRAP_UNLOCK_SI(si);
8065         swrap_mutex_unlock(&first_free_mutex);
8066         swrap_mutex_unlock(&socket_reset_mutex);
8067
8068         errno = ret_errno;
8069         return ret;
8070 }
8071
8072 static int swrap_noop_close(int fd)
8073 {
8074         (void)fd; /* unused */
8075         return 0;
8076 }
8077
8078 static void swrap_remove_stale(int fd)
8079 {
8080         swrap_remove_wrapper(__func__, swrap_noop_close, fd);
8081 }
8082
8083 /*
8084  * This allows socket_wrapper aware applications to
8085  * indicate that the given fd does not belong to
8086  * an inet socket.
8087  *
8088  * We already overload a lot of unrelated functions
8089  * like eventfd(), timerfd_create(), ... in order to
8090  * call swrap_remove_stale() on the returned fd, but
8091  * we'll never be able to handle all possible syscalls.
8092  *
8093  * socket_wrapper_indicate_no_inet_fd() gives them a way
8094  * to do the same.
8095  *
8096  * We don't export swrap_remove_stale() in order to
8097  * make it easier to analyze SOCKET_WRAPPER_DEBUGLEVEL=3
8098  * log files.
8099  */
8100 void socket_wrapper_indicate_no_inet_fd(int fd)
8101 {
8102         swrap_remove_wrapper(__func__, swrap_noop_close, fd);
8103 }
8104
8105 static int swrap_close(int fd)
8106 {
8107         return swrap_remove_wrapper(__func__, libc_close, fd);
8108 }
8109
8110 int close(int fd)
8111 {
8112         return swrap_close(fd);
8113 }
8114
8115 #ifdef HAVE___CLOSE_NOCANCEL
8116
8117 static int swrap___close_nocancel(int fd)
8118 {
8119         return swrap_remove_wrapper(__func__, libc___close_nocancel, fd);
8120 }
8121
8122 int __close_nocancel(int fd);
8123 int __close_nocancel(int fd)
8124 {
8125         return swrap___close_nocancel(fd);
8126 }
8127
8128 #endif /* HAVE___CLOSE_NOCANCEL */
8129
8130 /****************************
8131  * DUP
8132  ***************************/
8133
8134 static int swrap_dup(int fd)
8135 {
8136         struct socket_info *si;
8137         int dup_fd, idx;
8138
8139         idx = find_socket_info_index(fd);
8140         if (idx == -1) {
8141                 return libc_dup(fd);
8142         }
8143
8144         si = swrap_get_socket_info(idx);
8145
8146         dup_fd = libc_dup(fd);
8147         if (dup_fd == -1) {
8148                 int saved_errno = errno;
8149                 errno = saved_errno;
8150                 return -1;
8151         }
8152
8153         if ((size_t)dup_fd >= socket_fds_max) {
8154                 SWRAP_LOG(SWRAP_LOG_ERROR,
8155                           "The max socket index limit of %zu has been reached, "
8156                           "trying to add %d",
8157                           socket_fds_max,
8158                           dup_fd);
8159                 libc_close(dup_fd);
8160                 errno = EMFILE;
8161                 return -1;
8162         }
8163
8164         SWRAP_LOCK_SI(si);
8165
8166         swrap_inc_refcount(si);
8167
8168         SWRAP_UNLOCK_SI(si);
8169
8170         /* Make sure we don't have an entry for the fd */
8171         swrap_remove_stale(dup_fd);
8172
8173         set_socket_info_index(dup_fd, idx);
8174
8175         return dup_fd;
8176 }
8177
8178 int dup(int fd)
8179 {
8180         return swrap_dup(fd);
8181 }
8182
8183 /****************************
8184  * DUP2
8185  ***************************/
8186
8187 static int swrap_dup2(int fd, int newfd)
8188 {
8189         struct socket_info *si;
8190         int dup_fd, idx;
8191
8192         idx = find_socket_info_index(fd);
8193         if (idx == -1) {
8194                 return libc_dup2(fd, newfd);
8195         }
8196
8197         si = swrap_get_socket_info(idx);
8198
8199         if (fd == newfd) {
8200                 /*
8201                  * According to the manpage:
8202                  *
8203                  * "If oldfd is a valid file descriptor, and newfd has the same
8204                  * value as oldfd, then dup2() does nothing, and returns newfd."
8205                  */
8206                 return newfd;
8207         }
8208
8209         if ((size_t)newfd >= socket_fds_max) {
8210                 SWRAP_LOG(SWRAP_LOG_ERROR,
8211                           "The max socket index limit of %zu has been reached, "
8212                           "trying to add %d",
8213                           socket_fds_max,
8214                           newfd);
8215                 errno = EMFILE;
8216                 return -1;
8217         }
8218
8219         if (find_socket_info(newfd)) {
8220                 /* dup2() does an implicit close of newfd, which we
8221                  * need to emulate */
8222                 swrap_close(newfd);
8223         }
8224
8225         dup_fd = libc_dup2(fd, newfd);
8226         if (dup_fd == -1) {
8227                 int saved_errno = errno;
8228                 errno = saved_errno;
8229                 return -1;
8230         }
8231
8232         SWRAP_LOCK_SI(si);
8233
8234         swrap_inc_refcount(si);
8235
8236         SWRAP_UNLOCK_SI(si);
8237
8238         /* Make sure we don't have an entry for the fd */
8239         swrap_remove_stale(dup_fd);
8240
8241         set_socket_info_index(dup_fd, idx);
8242
8243         return dup_fd;
8244 }
8245
8246 int dup2(int fd, int newfd)
8247 {
8248         return swrap_dup2(fd, newfd);
8249 }
8250
8251 /****************************
8252  * FCNTL
8253  ***************************/
8254
8255 static int swrap_vfcntl(int fd, int cmd, va_list va)
8256 {
8257         struct socket_info *si;
8258         int rc, dup_fd, idx;
8259
8260         idx = find_socket_info_index(fd);
8261         if (idx == -1) {
8262                 return libc_vfcntl(fd, cmd, va);
8263         }
8264
8265         si = swrap_get_socket_info(idx);
8266
8267         switch (cmd) {
8268         case F_DUPFD:
8269                 dup_fd = libc_vfcntl(fd, cmd, va);
8270                 if (dup_fd == -1) {
8271                         int saved_errno = errno;
8272                         errno = saved_errno;
8273                         return -1;
8274                 }
8275
8276                 /* Make sure we don't have an entry for the fd */
8277                 swrap_remove_stale(dup_fd);
8278
8279                 if ((size_t)dup_fd >= socket_fds_max) {
8280                         SWRAP_LOG(SWRAP_LOG_ERROR,
8281                           "The max socket index limit of %zu has been reached, "
8282                           "trying to add %d",
8283                           socket_fds_max,
8284                           dup_fd);
8285                         libc_close(dup_fd);
8286                         errno = EMFILE;
8287                         return -1;
8288                 }
8289
8290                 SWRAP_LOCK_SI(si);
8291
8292                 swrap_inc_refcount(si);
8293
8294                 SWRAP_UNLOCK_SI(si);
8295
8296
8297                 set_socket_info_index(dup_fd, idx);
8298
8299                 rc = dup_fd;
8300                 break;
8301         default:
8302                 rc = libc_vfcntl(fd, cmd, va);
8303                 break;
8304         }
8305
8306         return rc;
8307 }
8308
8309 int fcntl(int fd, int cmd, ...)
8310 {
8311         va_list va;
8312         int rc;
8313
8314         va_start(va, cmd);
8315
8316         rc = swrap_vfcntl(fd, cmd, va);
8317
8318         va_end(va);
8319
8320         return rc;
8321 }
8322
8323 /****************************
8324  * EVENTFD
8325  ***************************/
8326
8327 #ifdef HAVE_EVENTFD
8328 static int swrap_eventfd(int count, int flags)
8329 {
8330         int fd;
8331
8332         fd = libc_eventfd(count, flags);
8333         if (fd != -1) {
8334                 swrap_remove_stale(fd);
8335         }
8336
8337         return fd;
8338 }
8339
8340 #ifdef HAVE_EVENTFD_UNSIGNED_INT
8341 int eventfd(unsigned int count, int flags)
8342 #else
8343 int eventfd(int count, int flags)
8344 #endif
8345 {
8346         return swrap_eventfd(count, flags);
8347 }
8348 #endif
8349
8350 #ifdef HAVE_PLEDGE
8351 int pledge(const char *promises, const char *paths[])
8352 {
8353         (void)promises; /* unused */
8354         (void)paths; /* unused */
8355
8356         return 0;
8357 }
8358 #endif /* HAVE_PLEDGE */
8359
8360 static void swrap_thread_prepare(void)
8361 {
8362         /*
8363          * This function should only be called here!!
8364          *
8365          * We bind all symobls to avoid deadlocks of the fork is
8366          * interrupted by a signal handler using a symbol of this
8367          * library.
8368          */
8369         swrap_bind_symbol_all();
8370
8371         SWRAP_LOCK_ALL;
8372 }
8373
8374 static void swrap_thread_parent(void)
8375 {
8376         SWRAP_UNLOCK_ALL;
8377 }
8378
8379 static void swrap_thread_child(void)
8380 {
8381         SWRAP_REINIT_ALL;
8382 }
8383
8384 /****************************
8385  * CONSTRUCTOR
8386  ***************************/
8387 void swrap_constructor(void)
8388 {
8389         if (PIPE_BUF < sizeof(struct swrap_unix_scm_rights)) {
8390                 SWRAP_LOG(SWRAP_LOG_ERROR,
8391                           "PIPE_BUF=%zu < "
8392                           "sizeof(struct swrap_unix_scm_rights)=%zu\n"
8393                           "sizeof(struct swrap_unix_scm_rights_payload)=%zu "
8394                           "sizeof(struct socket_info)=%zu",
8395                           (size_t)PIPE_BUF,
8396                           sizeof(struct swrap_unix_scm_rights),
8397                           sizeof(struct swrap_unix_scm_rights_payload),
8398                           sizeof(struct socket_info));
8399                 exit(-1);
8400         }
8401
8402         SWRAP_REINIT_ALL;
8403
8404         /*
8405         * If we hold a lock and the application forks, then the child
8406         * is not able to unlock the mutex and we are in a deadlock.
8407         * This should prevent such deadlocks.
8408         */
8409         pthread_atfork(&swrap_thread_prepare,
8410                        &swrap_thread_parent,
8411                        &swrap_thread_child);
8412 }
8413
8414 /****************************
8415  * DESTRUCTOR
8416  ***************************/
8417
8418 /*
8419  * This function is called when the library is unloaded and makes sure that
8420  * sockets get closed and the unix file for the socket are unlinked.
8421  */
8422 void swrap_destructor(void)
8423 {
8424         size_t i;
8425
8426         if (socket_fds_idx != NULL) {
8427                 for (i = 0; i < socket_fds_max; ++i) {
8428                         if (socket_fds_idx[i] != -1) {
8429                                 swrap_close(i);
8430                         }
8431                 }
8432                 SAFE_FREE(socket_fds_idx);
8433         }
8434
8435         SAFE_FREE(sockets);
8436
8437         if (swrap.libc.handle != NULL
8438 #ifdef RTLD_NEXT
8439             && swrap.libc.handle != RTLD_NEXT
8440 #endif
8441                         ) {
8442                 dlclose(swrap.libc.handle);
8443         }
8444         if (swrap.libc.socket_handle
8445 #ifdef RTLD_NEXT
8446             && swrap.libc.socket_handle != RTLD_NEXT
8447 #endif
8448                         ) {
8449                 dlclose(swrap.libc.socket_handle);
8450         }
8451 }
8452
8453 #if defined(HAVE__SOCKET) && defined(HAVE__CLOSE)
8454 /*
8455  * On FreeBSD 12 (and maybe other platforms)
8456  * system libraries like libresolv prefix there
8457  * syscalls with '_' in order to always use
8458  * the symbols from libc.
8459  *
8460  * In the interaction with resolv_wrapper,
8461  * we need to inject socket wrapper into libresolv,
8462  * which means we need to private all socket
8463  * related syscalls also with the '_' prefix.
8464  *
8465  * This is tested in Samba's 'make test',
8466  * there we noticed that providing '_read',
8467  * '_open' and '_close' would cause errors, which
8468  * means we skip '_read', '_write' and
8469  * all non socket related calls without
8470  * further analyzing the problem.
8471  */
8472 #define SWRAP_SYMBOL_ALIAS(__sym, __aliassym) \
8473         extern typeof(__sym) __aliassym __attribute__ ((alias(#__sym)))
8474
8475 #ifdef HAVE_ACCEPT4
8476 SWRAP_SYMBOL_ALIAS(accept4, _accept4);
8477 #endif
8478 SWRAP_SYMBOL_ALIAS(accept, _accept);
8479 SWRAP_SYMBOL_ALIAS(bind, _bind);
8480 SWRAP_SYMBOL_ALIAS(connect, _connect);
8481 SWRAP_SYMBOL_ALIAS(dup, _dup);
8482 SWRAP_SYMBOL_ALIAS(dup2, _dup2);
8483 SWRAP_SYMBOL_ALIAS(fcntl, _fcntl);
8484 SWRAP_SYMBOL_ALIAS(getpeername, _getpeername);
8485 SWRAP_SYMBOL_ALIAS(getsockname, _getsockname);
8486 SWRAP_SYMBOL_ALIAS(getsockopt, _getsockopt);
8487 SWRAP_SYMBOL_ALIAS(ioctl, _ioctl);
8488 SWRAP_SYMBOL_ALIAS(listen, _listen);
8489 SWRAP_SYMBOL_ALIAS(readv, _readv);
8490 SWRAP_SYMBOL_ALIAS(recv, _recv);
8491 SWRAP_SYMBOL_ALIAS(recvfrom, _recvfrom);
8492 SWRAP_SYMBOL_ALIAS(recvmsg, _recvmsg);
8493 SWRAP_SYMBOL_ALIAS(send, _send);
8494 SWRAP_SYMBOL_ALIAS(sendmsg, _sendmsg);
8495 SWRAP_SYMBOL_ALIAS(sendto, _sendto);
8496 SWRAP_SYMBOL_ALIAS(setsockopt, _setsockopt);
8497 SWRAP_SYMBOL_ALIAS(socket, _socket);
8498 SWRAP_SYMBOL_ALIAS(socketpair, _socketpair);
8499 SWRAP_SYMBOL_ALIAS(writev, _writev);
8500
8501 #endif /* SOCKET_WRAPPER_EXPORT_UNDERSCORE_SYMBOLS */