lib/replace: make sure krb5_cc_default[_name]() is no longer used directly
[samba.git] / lib / tsocket / tsocket_bsd.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Copyright (C) Stefan Metzmacher 2009
5
6      ** NOTE! The following LGPL license applies to the tsocket
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 3 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "replace.h"
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "tsocket.h"
28 #include "tsocket_internal.h"
29 #include "lib/util/iov_buf.h"
30 #include "lib/util/blocking.h"
31 #include "lib/util/util_net.h"
32 #include "lib/util/samba_util.h"
33
34 static int tsocket_bsd_error_from_errno(int ret,
35                                         int sys_errno,
36                                         bool *retry)
37 {
38         *retry = false;
39
40         if (ret >= 0) {
41                 return 0;
42         }
43
44         if (ret != -1) {
45                 return EIO;
46         }
47
48         if (sys_errno == 0) {
49                 return EIO;
50         }
51
52         if (sys_errno == EINTR) {
53                 *retry = true;
54                 return sys_errno;
55         }
56
57         if (sys_errno == EINPROGRESS) {
58                 *retry = true;
59                 return sys_errno;
60         }
61
62         if (sys_errno == EAGAIN) {
63                 *retry = true;
64                 return sys_errno;
65         }
66
67         /* ENOMEM is retryable on Solaris/illumos, and possibly other systems. */
68         if (sys_errno == ENOMEM) {
69                 *retry = true;
70                 return sys_errno;
71         }
72
73 #ifdef EWOULDBLOCK
74         if (sys_errno == EWOULDBLOCK) {
75                 *retry = true;
76                 return sys_errno;
77         }
78 #endif
79
80         return sys_errno;
81 }
82
83 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
84 {
85         int i;
86         int sys_errno = 0;
87         int fds[3];
88         int num_fds = 0;
89
90         int result;
91         bool ok;
92
93         if (fd == -1) {
94                 return -1;
95         }
96
97         /* first make a fd >= 3 */
98         if (high_fd) {
99                 while (fd < 3) {
100                         fds[num_fds++] = fd;
101                         fd = dup(fd);
102                         if (fd == -1) {
103                                 sys_errno = errno;
104                                 break;
105                         }
106                 }
107                 for (i=0; i<num_fds; i++) {
108                         close(fds[i]);
109                 }
110                 if (fd == -1) {
111                         errno = sys_errno;
112                         return fd;
113                 }
114         }
115
116         result = set_blocking(fd, false);
117         if (result == -1) {
118                 goto fail;
119         }
120
121         ok = smb_set_close_on_exec(fd);
122         if (!ok) {
123                 goto fail;
124         }
125
126         return fd;
127
128  fail:
129         if (fd != -1) {
130                 sys_errno = errno;
131                 close(fd);
132                 errno = sys_errno;
133         }
134         return -1;
135 }
136
137 #ifdef HAVE_LINUX_RTNETLINK_H
138 /**
139  * Get the amount of pending bytes from a netlink socket
140  *
141  * For some reason netlink sockets don't support querying the amount of pending
142  * data via ioctl with FIONREAD, which is what we use in tsocket_bsd_pending()
143  * below.
144  *
145  * We know we are on Linux as we're using netlink, which means we have a working
146  * MSG_TRUNC flag to recvmsg() as well, so we use that together with MSG_PEEK.
147  **/
148 static ssize_t tsocket_bsd_netlink_pending(int fd)
149 {
150         struct iovec iov;
151         struct msghdr msg;
152         char buf[1];
153
154         iov = (struct iovec) {
155                 .iov_base = buf,
156                 .iov_len = sizeof(buf)
157         };
158
159         msg = (struct msghdr) {
160                 .msg_iov = &iov,
161                 .msg_iovlen = 1
162         };
163
164         return recvmsg(fd, &msg, MSG_PEEK | MSG_TRUNC);
165 }
166 #else
167 static ssize_t tsocket_bsd_netlink_pending(int fd)
168 {
169         errno = ENOSYS;
170         return -1;
171 }
172 #endif
173
174 static ssize_t tsocket_bsd_pending(int fd)
175 {
176         int ret;
177         int value = 0;
178
179         ret = ioctl(fd, FIONREAD, &value);
180         if (ret == -1) {
181                 return ret;
182         }
183
184         if (ret != 0) {
185                 /* this should not be reached */
186                 errno = EIO;
187                 return -1;
188         }
189
190         if (value != 0) {
191                 return value;
192         }
193
194         return samba_socket_poll_or_sock_error(fd);
195 }
196
197 static const struct tsocket_address_ops tsocket_address_bsd_ops;
198
199 int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
200                                        const struct sockaddr *sa,
201                                        size_t sa_socklen,
202                                        struct tsocket_address **_addr,
203                                        const char *location)
204 {
205         struct tsocket_address *addr;
206         struct samba_sockaddr *bsda = NULL;
207
208         if (sa_socklen < sizeof(sa->sa_family)) {
209                 errno = EINVAL;
210                 return -1;
211         }
212
213         switch (sa->sa_family) {
214         case AF_UNIX:
215                 if (sa_socklen > sizeof(struct sockaddr_un)) {
216                         sa_socklen = sizeof(struct sockaddr_un);
217                 }
218                 break;
219         case AF_INET:
220                 if (sa_socklen < sizeof(struct sockaddr_in)) {
221                         errno = EINVAL;
222                         return -1;
223                 }
224                 sa_socklen = sizeof(struct sockaddr_in);
225                 break;
226 #ifdef HAVE_IPV6
227         case AF_INET6:
228                 if (sa_socklen < sizeof(struct sockaddr_in6)) {
229                         errno = EINVAL;
230                         return -1;
231                 }
232                 sa_socklen = sizeof(struct sockaddr_in6);
233                 break;
234 #endif
235         default:
236                 errno = EAFNOSUPPORT;
237                 return -1;
238         }
239
240         if (sa_socklen > sizeof(struct sockaddr_storage)) {
241                 errno = EINVAL;
242                 return -1;
243         }
244
245         addr = tsocket_address_create(mem_ctx,
246                                       &tsocket_address_bsd_ops,
247                                       &bsda,
248                                       struct samba_sockaddr,
249                                       location);
250         if (!addr) {
251                 errno = ENOMEM;
252                 return -1;
253         }
254
255         ZERO_STRUCTP(bsda);
256
257         memcpy(&bsda->u.ss, sa, sa_socklen);
258
259         bsda->sa_socklen = sa_socklen;
260 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
261         bsda->u.sa.sa_len = bsda->sa_socklen;
262 #endif
263
264         *_addr = addr;
265         return 0;
266 }
267
268 int _tsocket_address_bsd_from_samba_sockaddr(TALLOC_CTX *mem_ctx,
269                                          const struct samba_sockaddr *xs_addr,
270                                          struct tsocket_address **t_addr,
271                                          const char *location)
272 {
273         return _tsocket_address_bsd_from_sockaddr(mem_ctx,
274                                                   &xs_addr->u.sa,
275                                                   xs_addr->sa_socklen,
276                                                   t_addr,
277                                                   location);
278 }
279
280 ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
281                                      struct sockaddr *sa,
282                                      size_t sa_socklen)
283 {
284         struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
285                                            struct samba_sockaddr);
286
287         if (!bsda) {
288                 errno = EINVAL;
289                 return -1;
290         }
291
292         if (sa_socklen < bsda->sa_socklen) {
293                 errno = EINVAL;
294                 return -1;
295         }
296
297         if (sa_socklen > bsda->sa_socklen) {
298                 memset(sa, 0, sa_socklen);
299                 sa_socklen = bsda->sa_socklen;
300         }
301
302         memcpy(sa, &bsda->u.ss, sa_socklen);
303 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
304         sa->sa_len = sa_socklen;
305 #endif
306         return sa_socklen;
307 }
308
309 bool tsocket_address_is_inet(const struct tsocket_address *addr, const char *fam)
310 {
311         struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
312                                            struct samba_sockaddr);
313
314         if (!bsda) {
315                 return false;
316         }
317
318         switch (bsda->u.sa.sa_family) {
319         case AF_INET:
320                 if (strcasecmp(fam, "ip") == 0) {
321                         return true;
322                 }
323
324                 if (strcasecmp(fam, "ipv4") == 0) {
325                         return true;
326                 }
327
328                 return false;
329 #ifdef HAVE_IPV6
330         case AF_INET6:
331                 if (strcasecmp(fam, "ip") == 0) {
332                         return true;
333                 }
334
335                 if (strcasecmp(fam, "ipv6") == 0) {
336                         return true;
337                 }
338
339                 return false;
340 #endif
341         }
342
343         return false;
344 }
345
346 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
347                                        const char *fam,
348                                        const char *addr,
349                                        uint16_t port,
350                                        struct tsocket_address **_addr,
351                                        const char *location)
352 {
353         struct addrinfo hints;
354         struct addrinfo *result = NULL;
355         char port_str[6];
356         int ret;
357
358         ZERO_STRUCT(hints);
359         /*
360          * we use SOCKET_STREAM here to get just one result
361          * back from getaddrinfo().
362          */
363         hints.ai_socktype = SOCK_STREAM;
364         hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
365
366         if (strcasecmp(fam, "ip") == 0) {
367                 hints.ai_family = AF_UNSPEC;
368                 if (!addr) {
369 #ifdef HAVE_IPV6
370                         addr = "::";
371 #else
372                         addr = "0.0.0.0";
373 #endif
374                 }
375         } else if (strcasecmp(fam, "ipv4") == 0) {
376                 hints.ai_family = AF_INET;
377                 if (!addr) {
378                         addr = "0.0.0.0";
379                 }
380 #ifdef HAVE_IPV6
381         } else if (strcasecmp(fam, "ipv6") == 0) {
382                 hints.ai_family = AF_INET6;
383                 if (!addr) {
384                         addr = "::";
385                 }
386 #endif
387         } else {
388                 errno = EAFNOSUPPORT;
389                 return -1;
390         }
391
392         snprintf(port_str, sizeof(port_str), "%u", port);
393
394         ret = getaddrinfo(addr, port_str, &hints, &result);
395         if (ret != 0) {
396                 switch (ret) {
397                 case EAI_FAIL:
398                 case EAI_NONAME:
399 #ifdef EAI_ADDRFAMILY
400                 case EAI_ADDRFAMILY:
401 #endif
402                         errno = EINVAL;
403                         break;
404                 }
405                 ret = -1;
406                 goto done;
407         }
408
409         if (result->ai_socktype != SOCK_STREAM) {
410                 errno = EINVAL;
411                 ret = -1;
412                 goto done;
413         }
414
415         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
416                                                   result->ai_addr,
417                                                   result->ai_addrlen,
418                                                   _addr,
419                                                   location);
420
421 done:
422         if (result) {
423                 freeaddrinfo(result);
424         }
425         return ret;
426 }
427
428 int _tsocket_address_inet_from_hostport_strings(TALLOC_CTX *mem_ctx,
429                                                 const char *fam,
430                                                 const char *host_port_addr,
431                                                 uint16_t default_port,
432                                                 struct tsocket_address **_addr,
433                                                 const char *location)
434 {
435         char *pl_sq = NULL;
436         char *pr_sq = NULL;
437         char *pl_period = NULL;
438         char *port_sep = NULL;
439         char *cport = NULL;
440         char *buf = NULL;
441         uint64_t port = 0;
442         int ret;
443         char *s_addr = NULL;
444         uint16_t s_port = default_port;
445         bool conv_ret;
446         bool is_ipv6_by_squares = false;
447
448         if (host_port_addr == NULL) {
449                 /* got straight to next function if host_port_addr is NULL */
450                 goto get_addr;
451         }
452         buf = talloc_strdup(mem_ctx, host_port_addr);
453         if (buf == NULL) {
454                 errno = ENOMEM;
455                 return -1;
456         }
457         pl_period = strchr_m(buf, '.');
458         port_sep = strrchr_m(buf, ':');
459         pl_sq = strchr_m(buf, '[');
460         pr_sq = strrchr_m(buf, ']');
461         /* See if its IPv4 or IPv6 */
462         /* Only parse IPv6 with squares with/without port, and IPv4 with port */
463         /* Everything else, let tsocket_address_inet_from string() */
464         /* find parsing errors */
465 #ifdef HAVE_IPV6
466         is_ipv6_by_squares = (pl_sq != NULL && pr_sq != NULL && pr_sq > pl_sq);
467 #endif
468         if (is_ipv6_by_squares) {
469                 /* IPv6 possibly with port - squares detected */
470                 port_sep = pr_sq + 1;
471                 if (*port_sep == '\0') {
472                         s_addr = pl_sq + 1;
473                         *pr_sq = 0;
474                         s_port = default_port;
475                         goto get_addr;
476                 }
477                 if (*port_sep != ':') {
478                         errno = EINVAL;
479                         return -1;
480                 }
481                 cport = port_sep + 1;
482                 conv_ret = conv_str_u64(cport, &port);
483                 if (!conv_ret) {
484                         errno = EINVAL;
485                         return -1;
486                 }
487                 if (port > 65535) {
488                         errno = EINVAL;
489                         return -1;
490                 }
491                 s_port = (uint16_t)port;
492                 *port_sep = 0;
493                 *pr_sq = 0;
494                 s_addr = pl_sq + 1;
495                 *pl_sq = 0;
496                 goto get_addr;
497         } else if (pl_period != NULL && port_sep != NULL) {
498                 /* IPv4 with port - more than one period in string */
499                 cport = port_sep + 1;
500                 conv_ret = conv_str_u64(cport, &port);
501                 if (!conv_ret) {
502                         errno = EINVAL;
503                         return -1;
504                 }
505                 if (port > 65535) {
506                         errno = EINVAL;
507                         return -1;
508                 }
509                 s_port = (uint16_t)port;
510                 *port_sep = 0;
511                 s_addr = buf;
512                 goto get_addr;
513         } else {
514                 /* Everything else, let tsocket_address_inet_from string() */
515                 /* find parsing errors */
516                 s_addr = buf;
517                 s_port = default_port;
518                 goto get_addr;
519         }
520 get_addr:
521         ret = _tsocket_address_inet_from_strings(
522             mem_ctx, fam, s_addr, s_port, _addr, location);
523
524         return ret;
525 }
526
527 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
528                                        TALLOC_CTX *mem_ctx)
529 {
530         struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
531                                            struct samba_sockaddr);
532         char addr_str[INET6_ADDRSTRLEN+1];
533         const char *str;
534
535         if (!bsda) {
536                 errno = EINVAL;
537                 return NULL;
538         }
539
540         switch (bsda->u.sa.sa_family) {
541         case AF_INET:
542                 str = inet_ntop(bsda->u.in.sin_family,
543                                 &bsda->u.in.sin_addr,
544                                 addr_str, sizeof(addr_str));
545                 break;
546 #ifdef HAVE_IPV6
547         case AF_INET6:
548                 str = inet_ntop(bsda->u.in6.sin6_family,
549                                 &bsda->u.in6.sin6_addr,
550                                 addr_str, sizeof(addr_str));
551                 break;
552 #endif
553         default:
554                 errno = EINVAL;
555                 return NULL;
556         }
557
558         if (!str) {
559                 return NULL;
560         }
561
562         return talloc_strdup(mem_ctx, str);
563 }
564
565 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
566 {
567         struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
568                                            struct samba_sockaddr);
569         uint16_t port = 0;
570
571         if (!bsda) {
572                 errno = EINVAL;
573                 return 0;
574         }
575
576         switch (bsda->u.sa.sa_family) {
577         case AF_INET:
578                 port = ntohs(bsda->u.in.sin_port);
579                 break;
580 #ifdef HAVE_IPV6
581         case AF_INET6:
582                 port = ntohs(bsda->u.in6.sin6_port);
583                 break;
584 #endif
585         default:
586                 errno = EINVAL;
587                 return 0;
588         }
589
590         return port;
591 }
592
593 int tsocket_address_inet_set_port(struct tsocket_address *addr,
594                                   uint16_t port)
595 {
596         struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
597                                            struct samba_sockaddr);
598
599         if (!bsda) {
600                 errno = EINVAL;
601                 return -1;
602         }
603
604         switch (bsda->u.sa.sa_family) {
605         case AF_INET:
606                 bsda->u.in.sin_port = htons(port);
607                 break;
608 #ifdef HAVE_IPV6
609         case AF_INET6:
610                 bsda->u.in6.sin6_port = htons(port);
611                 break;
612 #endif
613         default:
614                 errno = EINVAL;
615                 return -1;
616         }
617
618         return 0;
619 }
620
621 bool tsocket_address_is_unix(const struct tsocket_address *addr)
622 {
623         struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
624                                            struct samba_sockaddr);
625
626         if (!bsda) {
627                 return false;
628         }
629
630         switch (bsda->u.sa.sa_family) {
631         case AF_UNIX:
632                 return true;
633         }
634
635         return false;
636 }
637
638 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
639                                     const char *path,
640                                     struct tsocket_address **_addr,
641                                     const char *location)
642 {
643         struct sockaddr_un un;
644         void *p = &un;
645         int ret;
646
647         if (!path) {
648                 path = "";
649         }
650
651         if (strlen(path) > sizeof(un.sun_path)-1) {
652                 errno = ENAMETOOLONG;
653                 return -1;
654         }
655
656         ZERO_STRUCT(un);
657         un.sun_family = AF_UNIX;
658         strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
659
660         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
661                                                  (struct sockaddr *)p,
662                                                  sizeof(un),
663                                                  _addr,
664                                                  location);
665
666         return ret;
667 }
668
669 char *tsocket_address_unix_path(const struct tsocket_address *addr,
670                                 TALLOC_CTX *mem_ctx)
671 {
672         struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
673                                            struct samba_sockaddr);
674         const char *str;
675
676         if (!bsda) {
677                 errno = EINVAL;
678                 return NULL;
679         }
680
681         switch (bsda->u.sa.sa_family) {
682         case AF_UNIX:
683                 str = bsda->u.un.sun_path;
684                 break;
685         default:
686                 errno = EINVAL;
687                 return NULL;
688         }
689
690         return talloc_strdup(mem_ctx, str);
691 }
692
693 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
694                                         TALLOC_CTX *mem_ctx)
695 {
696         struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
697                                            struct samba_sockaddr);
698         char *str;
699         char *addr_str;
700         const char *prefix = NULL;
701         uint16_t port;
702
703         switch (bsda->u.sa.sa_family) {
704         case AF_UNIX:
705                 return talloc_asprintf(mem_ctx, "unix:%s",
706                                        bsda->u.un.sun_path);
707         case AF_INET:
708                 prefix = "ipv4";
709                 break;
710 #ifdef HAVE_IPV6
711         case AF_INET6:
712                 prefix = "ipv6";
713                 break;
714 #endif
715         default:
716                 errno = EINVAL;
717                 return NULL;
718         }
719
720         addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
721         if (!addr_str) {
722                 return NULL;
723         }
724
725         port = tsocket_address_inet_port(addr);
726
727         str = talloc_asprintf(mem_ctx, "%s:%s:%u",
728                               prefix, addr_str, port);
729         talloc_free(addr_str);
730
731         return str;
732 }
733
734 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
735                                                          TALLOC_CTX *mem_ctx,
736                                                          const char *location)
737 {
738         struct samba_sockaddr *bsda = talloc_get_type(addr->private_data,
739                                            struct samba_sockaddr);
740         struct tsocket_address *copy;
741         int ret;
742
743         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
744                                                  &bsda->u.sa,
745                                                  bsda->sa_socklen,
746                                                  &copy,
747                                                  location);
748         if (ret != 0) {
749                 return NULL;
750         }
751
752         return copy;
753 }
754
755 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
756         .name           = "bsd",
757         .string         = tsocket_address_bsd_string,
758         .copy           = tsocket_address_bsd_copy,
759 };
760
761 struct tdgram_bsd {
762         int fd;
763
764         void *event_ptr;
765         struct tevent_fd *fde;
766         bool optimize_recvfrom;
767         bool netlink;
768
769         void *readable_private;
770         void (*readable_handler)(void *private_data);
771         void *writeable_private;
772         void (*writeable_handler)(void *private_data);
773 };
774
775 bool tdgram_bsd_optimize_recvfrom(struct tdgram_context *dgram,
776                                   bool on)
777 {
778         struct tdgram_bsd *bsds =
779                 talloc_get_type(_tdgram_context_data(dgram),
780                 struct tdgram_bsd);
781         bool old;
782
783         if (bsds == NULL) {
784                 /* not a bsd socket */
785                 return false;
786         }
787
788         old = bsds->optimize_recvfrom;
789         bsds->optimize_recvfrom = on;
790
791         return old;
792 }
793
794 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
795                                    struct tevent_fd *fde,
796                                    uint16_t flags,
797                                    void *private_data)
798 {
799         struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
800                                   struct tdgram_bsd);
801
802         if (flags & TEVENT_FD_WRITE) {
803                 bsds->writeable_handler(bsds->writeable_private);
804                 return;
805         }
806         if (flags & TEVENT_FD_READ) {
807                 if (!bsds->readable_handler) {
808                         TEVENT_FD_NOT_READABLE(bsds->fde);
809                         return;
810                 }
811                 bsds->readable_handler(bsds->readable_private);
812                 return;
813         }
814 }
815
816 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
817                                            struct tevent_context *ev,
818                                            void (*handler)(void *private_data),
819                                            void *private_data)
820 {
821         if (ev == NULL) {
822                 if (handler) {
823                         errno = EINVAL;
824                         return -1;
825                 }
826                 if (!bsds->readable_handler) {
827                         return 0;
828                 }
829                 bsds->readable_handler = NULL;
830                 bsds->readable_private = NULL;
831
832                 return 0;
833         }
834
835         /* read and write must use the same tevent_context */
836         if (bsds->event_ptr != ev) {
837                 if (bsds->readable_handler || bsds->writeable_handler) {
838                         errno = EINVAL;
839                         return -1;
840                 }
841                 bsds->event_ptr = NULL;
842                 TALLOC_FREE(bsds->fde);
843         }
844
845         if (tevent_fd_get_flags(bsds->fde) == 0) {
846                 TALLOC_FREE(bsds->fde);
847
848                 bsds->fde = tevent_add_fd(ev, bsds,
849                                           bsds->fd, TEVENT_FD_READ,
850                                           tdgram_bsd_fde_handler,
851                                           bsds);
852                 if (!bsds->fde) {
853                         errno = ENOMEM;
854                         return -1;
855                 }
856
857                 /* cache the event context we're running on */
858                 bsds->event_ptr = ev;
859         } else if (!bsds->readable_handler) {
860                 TEVENT_FD_READABLE(bsds->fde);
861         }
862
863         bsds->readable_handler = handler;
864         bsds->readable_private = private_data;
865
866         return 0;
867 }
868
869 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
870                                             struct tevent_context *ev,
871                                             void (*handler)(void *private_data),
872                                             void *private_data)
873 {
874         if (ev == NULL) {
875                 if (handler) {
876                         errno = EINVAL;
877                         return -1;
878                 }
879                 if (!bsds->writeable_handler) {
880                         return 0;
881                 }
882                 bsds->writeable_handler = NULL;
883                 bsds->writeable_private = NULL;
884                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
885
886                 return 0;
887         }
888
889         /* read and write must use the same tevent_context */
890         if (bsds->event_ptr != ev) {
891                 if (bsds->readable_handler || bsds->writeable_handler) {
892                         errno = EINVAL;
893                         return -1;
894                 }
895                 bsds->event_ptr = NULL;
896                 TALLOC_FREE(bsds->fde);
897         }
898
899         if (tevent_fd_get_flags(bsds->fde) == 0) {
900                 TALLOC_FREE(bsds->fde);
901
902                 bsds->fde = tevent_add_fd(ev, bsds,
903                                           bsds->fd, TEVENT_FD_WRITE,
904                                           tdgram_bsd_fde_handler,
905                                           bsds);
906                 if (!bsds->fde) {
907                         errno = ENOMEM;
908                         return -1;
909                 }
910
911                 /* cache the event context we're running on */
912                 bsds->event_ptr = ev;
913         } else if (!bsds->writeable_handler) {
914                 TEVENT_FD_WRITEABLE(bsds->fde);
915         }
916
917         bsds->writeable_handler = handler;
918         bsds->writeable_private = private_data;
919
920         return 0;
921 }
922
923 struct tdgram_bsd_recvfrom_state {
924         struct tdgram_context *dgram;
925         bool first_try;
926         uint8_t *buf;
927         size_t len;
928         struct tsocket_address *src;
929 };
930
931 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
932 {
933         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
934                                   struct tdgram_bsd);
935
936         tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
937
938         return 0;
939 }
940
941 static void tdgram_bsd_recvfrom_handler(void *private_data);
942
943 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
944                                         struct tevent_context *ev,
945                                         struct tdgram_context *dgram)
946 {
947         struct tevent_req *req;
948         struct tdgram_bsd_recvfrom_state *state;
949         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
950         int ret;
951
952         req = tevent_req_create(mem_ctx, &state,
953                                 struct tdgram_bsd_recvfrom_state);
954         if (!req) {
955                 return NULL;
956         }
957
958         state->dgram    = dgram;
959         state->first_try= true;
960         state->buf      = NULL;
961         state->len      = 0;
962         state->src      = NULL;
963
964         talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
965
966         if (bsds->fd == -1) {
967                 tevent_req_error(req, ENOTCONN);
968                 goto post;
969         }
970
971
972         /*
973          * this is a fast path, not waiting for the
974          * socket to become explicit readable gains
975          * about 10%-20% performance in benchmark tests.
976          */
977         if (bsds->optimize_recvfrom) {
978                 /*
979                  * We only do the optimization on
980                  * recvfrom if the caller asked for it.
981                  *
982                  * This is needed because in most cases
983                  * we prefer to flush send buffers before
984                  * receiving incoming requests.
985                  */
986                 tdgram_bsd_recvfrom_handler(req);
987                 if (!tevent_req_is_in_progress(req)) {
988                         goto post;
989                 }
990         }
991
992         ret = tdgram_bsd_set_readable_handler(bsds, ev,
993                                               tdgram_bsd_recvfrom_handler,
994                                               req);
995         if (ret == -1) {
996                 tevent_req_error(req, errno);
997                 goto post;
998         }
999
1000         return req;
1001
1002  post:
1003         tevent_req_post(req, ev);
1004         return req;
1005 }
1006
1007 static void tdgram_bsd_recvfrom_handler(void *private_data)
1008 {
1009         struct tevent_req *req = talloc_get_type_abort(private_data,
1010                                  struct tevent_req);
1011         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
1012                                         struct tdgram_bsd_recvfrom_state);
1013         struct tdgram_context *dgram = state->dgram;
1014         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1015         struct samba_sockaddr *bsda = NULL;
1016         ssize_t ret;
1017         int err;
1018         bool retry;
1019
1020         if (bsds->netlink) {
1021                 ret = tsocket_bsd_netlink_pending(bsds->fd);
1022         } else {
1023                 ret = tsocket_bsd_pending(bsds->fd);
1024         }
1025
1026         if (state->first_try && ret == 0) {
1027                 state->first_try = false;
1028                 /* retry later */
1029                 return;
1030         }
1031         state->first_try = false;
1032
1033         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1034         if (retry) {
1035                 /* retry later */
1036                 return;
1037         }
1038         if (tevent_req_error(req, err)) {
1039                 return;
1040         }
1041
1042         /* note that 'ret' can be 0 here */
1043         state->buf = talloc_array(state, uint8_t, ret);
1044         if (tevent_req_nomem(state->buf, req)) {
1045                 return;
1046         }
1047         state->len = ret;
1048
1049         state->src = tsocket_address_create(state,
1050                                             &tsocket_address_bsd_ops,
1051                                             &bsda,
1052                                             struct samba_sockaddr,
1053                                             __location__ "bsd_recvfrom");
1054         if (tevent_req_nomem(state->src, req)) {
1055                 return;
1056         }
1057
1058         ZERO_STRUCTP(bsda);
1059         bsda->sa_socklen = sizeof(bsda->u.ss);
1060 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
1061         bsda->u.sa.sa_len = bsda->sa_socklen;
1062 #endif
1063
1064         ret = recvfrom(bsds->fd, state->buf, state->len, 0,
1065                        &bsda->u.sa, &bsda->sa_socklen);
1066         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1067         if (retry) {
1068                 /* retry later */
1069                 return;
1070         }
1071         if (tevent_req_error(req, err)) {
1072                 return;
1073         }
1074
1075         /*
1076          * Some systems (FreeBSD, see bug #7115) return too much
1077          * bytes in tsocket_bsd_pending()/ioctl(fd, FIONREAD, ...),
1078          * the return value includes some IP/UDP header bytes,
1079          * while recvfrom() just returns the payload.
1080          */
1081         state->buf = talloc_realloc(state, state->buf, uint8_t, ret);
1082         if (tevent_req_nomem(state->buf, req)) {
1083                 return;
1084         }
1085         state->len = ret;
1086
1087         tevent_req_done(req);
1088 }
1089
1090 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
1091                                         int *perrno,
1092                                         TALLOC_CTX *mem_ctx,
1093                                         uint8_t **buf,
1094                                         struct tsocket_address **src)
1095 {
1096         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
1097                                         struct tdgram_bsd_recvfrom_state);
1098         ssize_t ret;
1099
1100         ret = tsocket_simple_int_recv(req, perrno);
1101         if (ret == 0) {
1102                 *buf = talloc_move(mem_ctx, &state->buf);
1103                 ret = state->len;
1104                 if (src) {
1105                         *src = talloc_move(mem_ctx, &state->src);
1106                 }
1107         }
1108
1109         tevent_req_received(req);
1110         return ret;
1111 }
1112
1113 struct tdgram_bsd_sendto_state {
1114         struct tdgram_context *dgram;
1115
1116         const uint8_t *buf;
1117         size_t len;
1118         const struct tsocket_address *dst;
1119
1120         ssize_t ret;
1121 };
1122
1123 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
1124 {
1125         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
1126                                   struct tdgram_bsd);
1127
1128         tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1129
1130         return 0;
1131 }
1132
1133 static void tdgram_bsd_sendto_handler(void *private_data);
1134
1135 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
1136                                                  struct tevent_context *ev,
1137                                                  struct tdgram_context *dgram,
1138                                                  const uint8_t *buf,
1139                                                  size_t len,
1140                                                  const struct tsocket_address *dst)
1141 {
1142         struct tevent_req *req;
1143         struct tdgram_bsd_sendto_state *state;
1144         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1145         int ret;
1146
1147         req = tevent_req_create(mem_ctx, &state,
1148                                 struct tdgram_bsd_sendto_state);
1149         if (!req) {
1150                 return NULL;
1151         }
1152
1153         state->dgram    = dgram;
1154         state->buf      = buf;
1155         state->len      = len;
1156         state->dst      = dst;
1157         state->ret      = -1;
1158
1159         talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
1160
1161         if (bsds->fd == -1) {
1162                 tevent_req_error(req, ENOTCONN);
1163                 goto post;
1164         }
1165
1166         /*
1167          * this is a fast path, not waiting for the
1168          * socket to become explicit writeable gains
1169          * about 10%-20% performance in benchmark tests.
1170          */
1171         tdgram_bsd_sendto_handler(req);
1172         if (!tevent_req_is_in_progress(req)) {
1173                 goto post;
1174         }
1175
1176         ret = tdgram_bsd_set_writeable_handler(bsds, ev,
1177                                                tdgram_bsd_sendto_handler,
1178                                                req);
1179         if (ret == -1) {
1180                 tevent_req_error(req, errno);
1181                 goto post;
1182         }
1183
1184         return req;
1185
1186  post:
1187         tevent_req_post(req, ev);
1188         return req;
1189 }
1190
1191 static void tdgram_bsd_sendto_handler(void *private_data)
1192 {
1193         struct tevent_req *req = talloc_get_type_abort(private_data,
1194                                  struct tevent_req);
1195         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1196                                         struct tdgram_bsd_sendto_state);
1197         struct tdgram_context *dgram = state->dgram;
1198         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1199         struct sockaddr *sa = NULL;
1200         socklen_t sa_socklen = 0;
1201         ssize_t ret;
1202         int err;
1203         bool retry;
1204
1205         if (state->dst) {
1206                 struct samba_sockaddr *bsda =
1207                         talloc_get_type(state->dst->private_data,
1208                         struct samba_sockaddr);
1209
1210                 sa = &bsda->u.sa;
1211                 sa_socklen = bsda->sa_socklen;
1212         }
1213
1214         ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
1215         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1216         if (retry) {
1217                 /* retry later */
1218                 return;
1219         }
1220
1221         if (err == EMSGSIZE) {
1222                 /* round up in 1K increments */
1223                 int bufsize = ((state->len + 1023) & (~1023));
1224
1225                 ret = setsockopt(bsds->fd, SOL_SOCKET, SO_SNDBUF, &bufsize,
1226                                  sizeof(bufsize));
1227                 if (ret == 0) {
1228                         /*
1229                          * We do the retry here, rather then via the
1230                          * handler, as we only want to retry once for
1231                          * this condition, so if there is a mismatch
1232                          * between what setsockopt() accepts and what can
1233                          * actually be sent, we do not end up in a
1234                          * loop.
1235                          */
1236
1237                         ret = sendto(bsds->fd, state->buf, state->len,
1238                                      0, sa, sa_socklen);
1239                         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1240                         if (retry) { /* retry later */
1241                                 return;
1242                         }
1243                 }
1244         }
1245
1246         if (tevent_req_error(req, err)) {
1247                 return;
1248         }
1249
1250         state->ret = ret;
1251
1252         tevent_req_done(req);
1253 }
1254
1255 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
1256 {
1257         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1258                                         struct tdgram_bsd_sendto_state);
1259         ssize_t ret;
1260
1261         ret = tsocket_simple_int_recv(req, perrno);
1262         if (ret == 0) {
1263                 ret = state->ret;
1264         }
1265
1266         tevent_req_received(req);
1267         return ret;
1268 }
1269
1270 struct tdgram_bsd_disconnect_state {
1271         uint8_t __dummy;
1272 };
1273
1274 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1275                                                      struct tevent_context *ev,
1276                                                      struct tdgram_context *dgram)
1277 {
1278         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1279         struct tevent_req *req;
1280         struct tdgram_bsd_disconnect_state *state;
1281         int ret;
1282         int err;
1283         bool dummy;
1284
1285         req = tevent_req_create(mem_ctx, &state,
1286                                 struct tdgram_bsd_disconnect_state);
1287         if (req == NULL) {
1288                 return NULL;
1289         }
1290
1291         if (bsds->fd == -1) {
1292                 tevent_req_error(req, ENOTCONN);
1293                 goto post;
1294         }
1295
1296         TALLOC_FREE(bsds->fde);
1297         ret = close(bsds->fd);
1298         bsds->fd = -1;
1299         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1300         if (tevent_req_error(req, err)) {
1301                 goto post;
1302         }
1303
1304         tevent_req_done(req);
1305 post:
1306         tevent_req_post(req, ev);
1307         return req;
1308 }
1309
1310 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1311                                       int *perrno)
1312 {
1313         int ret;
1314
1315         ret = tsocket_simple_int_recv(req, perrno);
1316
1317         tevent_req_received(req);
1318         return ret;
1319 }
1320
1321 static const struct tdgram_context_ops tdgram_bsd_ops = {
1322         .name                   = "bsd",
1323
1324         .recvfrom_send          = tdgram_bsd_recvfrom_send,
1325         .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
1326
1327         .sendto_send            = tdgram_bsd_sendto_send,
1328         .sendto_recv            = tdgram_bsd_sendto_recv,
1329
1330         .disconnect_send        = tdgram_bsd_disconnect_send,
1331         .disconnect_recv        = tdgram_bsd_disconnect_recv,
1332 };
1333
1334 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1335 {
1336         TALLOC_FREE(bsds->fde);
1337         if (bsds->fd != -1) {
1338                 close(bsds->fd);
1339                 bsds->fd = -1;
1340         }
1341         return 0;
1342 }
1343
1344 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1345                                    const struct tsocket_address *remote,
1346                                    bool broadcast,
1347                                    TALLOC_CTX *mem_ctx,
1348                                    struct tdgram_context **_dgram,
1349                                    const char *location)
1350 {
1351         struct samba_sockaddr *lbsda =
1352                 talloc_get_type_abort(local->private_data,
1353                 struct samba_sockaddr);
1354         struct samba_sockaddr *rbsda = NULL;
1355         struct tdgram_context *dgram;
1356         struct tdgram_bsd *bsds;
1357         int fd;
1358         int ret;
1359         bool do_bind = false;
1360         bool do_reuseaddr = false;
1361         bool do_ipv6only = false;
1362         bool is_inet = false;
1363         int sa_fam = lbsda->u.sa.sa_family;
1364
1365         if (remote) {
1366                 rbsda = talloc_get_type_abort(remote->private_data,
1367                         struct samba_sockaddr);
1368         }
1369
1370         switch (lbsda->u.sa.sa_family) {
1371         case AF_UNIX:
1372                 if (broadcast) {
1373                         errno = EINVAL;
1374                         return -1;
1375                 }
1376                 if (lbsda->u.un.sun_path[0] != 0) {
1377                         do_reuseaddr = true;
1378                         do_bind = true;
1379                 }
1380                 break;
1381         case AF_INET:
1382                 if (lbsda->u.in.sin_port != 0) {
1383                         do_reuseaddr = true;
1384                         do_bind = true;
1385                 }
1386                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
1387                         do_bind = true;
1388                 }
1389                 is_inet = true;
1390                 break;
1391 #ifdef HAVE_IPV6
1392         case AF_INET6:
1393                 if (lbsda->u.in6.sin6_port != 0) {
1394                         do_reuseaddr = true;
1395                         do_bind = true;
1396                 }
1397                 if (memcmp(&in6addr_any,
1398                            &lbsda->u.in6.sin6_addr,
1399                            sizeof(in6addr_any)) != 0) {
1400                         do_bind = true;
1401                 }
1402                 is_inet = true;
1403                 do_ipv6only = true;
1404                 break;
1405 #endif
1406         default:
1407                 errno = EINVAL;
1408                 return -1;
1409         }
1410
1411         if (!do_bind && is_inet && rbsda) {
1412                 sa_fam = rbsda->u.sa.sa_family;
1413                 switch (sa_fam) {
1414                 case AF_INET:
1415                         do_ipv6only = false;
1416                         break;
1417 #ifdef HAVE_IPV6
1418                 case AF_INET6:
1419                         do_ipv6only = true;
1420                         break;
1421 #endif
1422                 }
1423         }
1424
1425         fd = socket(sa_fam, SOCK_DGRAM, 0);
1426         if (fd < 0) {
1427                 return -1;
1428         }
1429
1430         fd = tsocket_bsd_common_prepare_fd(fd, true);
1431         if (fd < 0) {
1432                 return -1;
1433         }
1434
1435         dgram = tdgram_context_create(mem_ctx,
1436                                       &tdgram_bsd_ops,
1437                                       &bsds,
1438                                       struct tdgram_bsd,
1439                                       location);
1440         if (!dgram) {
1441                 int saved_errno = errno;
1442                 close(fd);
1443                 errno = saved_errno;
1444                 return -1;
1445         }
1446         ZERO_STRUCTP(bsds);
1447         bsds->fd = fd;
1448         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1449
1450 #ifdef HAVE_IPV6
1451         if (do_ipv6only) {
1452                 int val = 1;
1453
1454                 ret = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
1455                                  (const void *)&val, sizeof(val));
1456                 if (ret == -1) {
1457                         int saved_errno = errno;
1458                         talloc_free(dgram);
1459                         errno = saved_errno;
1460                         return -1;
1461                 }
1462         }
1463 #endif
1464
1465         if (broadcast) {
1466                 int val = 1;
1467
1468                 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1469                                  (const void *)&val, sizeof(val));
1470                 if (ret == -1) {
1471                         int saved_errno = errno;
1472                         talloc_free(dgram);
1473                         errno = saved_errno;
1474                         return -1;
1475                 }
1476         }
1477
1478         if (do_reuseaddr) {
1479                 int val = 1;
1480
1481                 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1482                                  (const void *)&val, sizeof(val));
1483                 if (ret == -1) {
1484                         int saved_errno = errno;
1485                         talloc_free(dgram);
1486                         errno = saved_errno;
1487                         return -1;
1488                 }
1489         }
1490
1491         if (do_bind) {
1492                 ret = bind(fd, &lbsda->u.sa, lbsda->sa_socklen);
1493                 if (ret == -1) {
1494                         int saved_errno = errno;
1495                         talloc_free(dgram);
1496                         errno = saved_errno;
1497                         return -1;
1498                 }
1499         }
1500
1501         if (rbsda) {
1502                 if (rbsda->u.sa.sa_family != sa_fam) {
1503                         talloc_free(dgram);
1504                         errno = EINVAL;
1505                         return -1;
1506                 }
1507
1508                 ret = connect(fd, &rbsda->u.sa, rbsda->sa_socklen);
1509                 if (ret == -1) {
1510                         int saved_errno = errno;
1511                         talloc_free(dgram);
1512                         errno = saved_errno;
1513                         return -1;
1514                 }
1515         }
1516
1517         *_dgram = dgram;
1518         return 0;
1519 }
1520
1521 int _tdgram_bsd_existing_socket(TALLOC_CTX *mem_ctx,
1522                                 int fd,
1523                                 struct tdgram_context **_dgram,
1524                                 const char *location)
1525 {
1526         struct tdgram_context *dgram;
1527         struct tdgram_bsd *bsds;
1528 #ifdef HAVE_LINUX_RTNETLINK_H
1529         int result;
1530         struct sockaddr sa;
1531         socklen_t sa_len = sizeof(struct sockaddr);
1532 #endif
1533
1534         dgram = tdgram_context_create(mem_ctx,
1535                                       &tdgram_bsd_ops,
1536                                       &bsds,
1537                                       struct tdgram_bsd,
1538                                       location);
1539         if (!dgram) {
1540                 return -1;
1541         }
1542         ZERO_STRUCTP(bsds);
1543         bsds->fd = fd;
1544         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1545
1546         *_dgram = dgram;
1547
1548 #ifdef HAVE_LINUX_RTNETLINK_H
1549         /*
1550          * Try to determine the protocol family and remember if it's
1551          * AF_NETLINK. We don't care if this fails.
1552          */
1553         result = getsockname(fd, &sa, &sa_len);
1554         if (result == 0 && sa.sa_family == AF_NETLINK) {
1555                 bsds->netlink = true;
1556         }
1557 #endif
1558
1559         return 0;
1560 }
1561
1562 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1563                             const struct tsocket_address *remote,
1564                             TALLOC_CTX *mem_ctx,
1565                             struct tdgram_context **dgram,
1566                             const char *location)
1567 {
1568         struct samba_sockaddr *lbsda =
1569                 talloc_get_type_abort(local->private_data,
1570                 struct samba_sockaddr);
1571         int ret;
1572
1573         switch (lbsda->u.sa.sa_family) {
1574         case AF_INET:
1575                 break;
1576 #ifdef HAVE_IPV6
1577         case AF_INET6:
1578                 break;
1579 #endif
1580         default:
1581                 errno = EINVAL;
1582                 return -1;
1583         }
1584
1585         ret = tdgram_bsd_dgram_socket(local, remote, false,
1586                                       mem_ctx, dgram, location);
1587
1588         return ret;
1589 }
1590
1591 int _tdgram_inet_udp_broadcast_socket(const struct tsocket_address *local,
1592                                       TALLOC_CTX *mem_ctx,
1593                                       struct tdgram_context **dgram,
1594                                       const char *location)
1595 {
1596         struct samba_sockaddr *lbsda =
1597                 talloc_get_type_abort(local->private_data,
1598                 struct samba_sockaddr);
1599         int ret;
1600
1601         switch (lbsda->u.sa.sa_family) {
1602         case AF_INET:
1603                 break;
1604 #ifdef HAVE_IPV6
1605         case AF_INET6:
1606                 /* only ipv4 */
1607                 errno = EINVAL;
1608                 return -1;
1609 #endif
1610         default:
1611                 errno = EINVAL;
1612                 return -1;
1613         }
1614
1615         ret = tdgram_bsd_dgram_socket(local, NULL, true,
1616                                       mem_ctx, dgram, location);
1617
1618         return ret;
1619 }
1620
1621 int _tdgram_unix_socket(const struct tsocket_address *local,
1622                         const struct tsocket_address *remote,
1623                         TALLOC_CTX *mem_ctx,
1624                         struct tdgram_context **dgram,
1625                         const char *location)
1626 {
1627         struct samba_sockaddr *lbsda =
1628                 talloc_get_type_abort(local->private_data,
1629                 struct samba_sockaddr);
1630         int ret;
1631
1632         switch (lbsda->u.sa.sa_family) {
1633         case AF_UNIX:
1634                 break;
1635         default:
1636                 errno = EINVAL;
1637                 return -1;
1638         }
1639
1640         ret = tdgram_bsd_dgram_socket(local, remote, false,
1641                                       mem_ctx, dgram, location);
1642
1643         return ret;
1644 }
1645
1646 struct tstream_bsd {
1647         int fd;
1648         int error;
1649
1650         void *event_ptr;
1651         struct tevent_fd *fde;
1652         bool optimize_readv;
1653         bool fail_readv_first_error;
1654
1655         void *readable_private;
1656         void (*readable_handler)(void *private_data);
1657         void *writeable_private;
1658         void (*writeable_handler)(void *private_data);
1659 };
1660
1661 bool tstream_bsd_optimize_readv(struct tstream_context *stream,
1662                                 bool on)
1663 {
1664         struct tstream_bsd *bsds =
1665                 talloc_get_type(_tstream_context_data(stream),
1666                 struct tstream_bsd);
1667         bool old;
1668
1669         if (bsds == NULL) {
1670                 /* not a bsd socket */
1671                 return false;
1672         }
1673
1674         old = bsds->optimize_readv;
1675         bsds->optimize_readv = on;
1676
1677         return old;
1678 }
1679
1680 bool tstream_bsd_fail_readv_first_error(struct tstream_context *stream,
1681                                         bool on)
1682 {
1683         struct tstream_bsd *bsds =
1684                 talloc_get_type(_tstream_context_data(stream),
1685                 struct tstream_bsd);
1686         bool old;
1687
1688         if (bsds == NULL) {
1689                 /* not a bsd socket */
1690                 return false;
1691         }
1692
1693         old = bsds->fail_readv_first_error;
1694         bsds->fail_readv_first_error = on;
1695
1696         return old;
1697 }
1698
1699 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1700                                     struct tevent_fd *fde,
1701                                     uint16_t flags,
1702                                     void *private_data)
1703 {
1704         struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1705                                    struct tstream_bsd);
1706
1707         if (flags & TEVENT_FD_ERROR) {
1708                 /*
1709                  * We lazily keep TEVENT_FD_READ alive
1710                  * in tstream_bsd_set_readable_handler()
1711                  *
1712                  * So we have to check TEVENT_FD_READ
1713                  * as well as bsds->readable_handler
1714                  *
1715                  * We only drain remaining data from the
1716                  * the recv queue if available and desired.
1717                  */
1718                 if ((flags & TEVENT_FD_READ) &&
1719                     !bsds->fail_readv_first_error &&
1720                     (bsds->readable_handler != NULL))
1721                 {
1722                         /*
1723                          * If there's still data to read
1724                          * we allow it to be read until
1725                          * we reach EOF (=> EPIPE).
1726                          */
1727                         bsds->readable_handler(bsds->readable_private);
1728                         return;
1729                 }
1730
1731                 /*
1732                  * If there's no data left to read,
1733                  * we get the error.
1734                  *
1735                  * It means we no longer call any readv or
1736                  * writev, as bsds->error is checked first.
1737                  */
1738                 if (bsds->error == 0) {
1739                         int ret = samba_socket_poll_or_sock_error(bsds->fd);
1740
1741                         if (ret == -1) {
1742                                 bsds->error = errno;
1743                         }
1744                         /* fallback to EPIPE */
1745                         if (bsds->error == 0) {
1746                                 bsds->error = EPIPE;
1747                         }
1748                 }
1749
1750                 /*
1751                  * Let write to fail early.
1752                  *
1753                  * Note we only need to check TEVENT_FD_WRITE
1754                  * as tstream_bsd_set_writeable_handler()
1755                  * clear it together with the handler.
1756                  */
1757                 if (flags & TEVENT_FD_WRITE) {
1758                         bsds->writeable_handler(bsds->writeable_private);
1759                         return;
1760                 }
1761
1762                 /* We prefer the readable handler to fire first. */
1763                 if (bsds->readable_handler != NULL) {
1764                         bsds->readable_handler(bsds->readable_private);
1765                         return;
1766                 }
1767
1768                 /* As last resort we notify the writeable handler */
1769                 if (bsds->writeable_handler != NULL) {
1770                         bsds->writeable_handler(bsds->writeable_private);
1771                         return;
1772                 }
1773
1774                 /*
1775                  * We may hit this because we don't clear TEVENT_FD_ERROR
1776                  * in tstream_bsd_set_readable_handler() nor
1777                  * tstream_bsd_set_writeable_handler().
1778                  *
1779                  * As we already captured the error, we can remove
1780                  * the fde completely.
1781                  */
1782                 TALLOC_FREE(bsds->fde);
1783                 return;
1784         }
1785         if (flags & TEVENT_FD_WRITE) {
1786                 bsds->writeable_handler(bsds->writeable_private);
1787                 return;
1788         }
1789         if (flags & TEVENT_FD_READ) {
1790                 if (!bsds->readable_handler) {
1791                         /*
1792                          * tstream_bsd_set_readable_handler
1793                          * doesn't clear TEVENT_FD_READ.
1794                          *
1795                          * In order to avoid cpu-spinning
1796                          * we need to clear it here.
1797                          */
1798                         TEVENT_FD_NOT_READABLE(bsds->fde);
1799
1800                         /*
1801                          * Here we're lazy and keep TEVENT_FD_ERROR
1802                          * alive. If it's triggered the next time
1803                          * we'll handle it gracefully above
1804                          * and end up with TALLOC_FREE(bsds->fde);
1805                          * in order to spin on TEVENT_FD_ERROR.
1806                          */
1807                         return;
1808                 }
1809                 bsds->readable_handler(bsds->readable_private);
1810                 return;
1811         }
1812 }
1813
1814 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1815                                             struct tevent_context *ev,
1816                                             void (*handler)(void *private_data),
1817                                             void *private_data)
1818 {
1819         if (ev == NULL) {
1820                 if (handler) {
1821                         errno = EINVAL;
1822                         return -1;
1823                 }
1824                 if (!bsds->readable_handler) {
1825                         return 0;
1826                 }
1827                 bsds->readable_handler = NULL;
1828                 bsds->readable_private = NULL;
1829
1830                 /*
1831                  * Here we are lazy as it's very likely that the next
1832                  * tevent_readv_send() will come in shortly,
1833                  * so we keep TEVENT_FD_READ alive.
1834                  */
1835                 return 0;
1836         }
1837
1838         /* read and write must use the same tevent_context */
1839         if (bsds->event_ptr != ev) {
1840                 if (bsds->readable_handler || bsds->writeable_handler) {
1841                         errno = EINVAL;
1842                         return -1;
1843                 }
1844                 bsds->event_ptr = NULL;
1845                 TALLOC_FREE(bsds->fde);
1846         }
1847
1848         if (tevent_fd_get_flags(bsds->fde) == 0) {
1849                 TALLOC_FREE(bsds->fde);
1850
1851                 bsds->fde = tevent_add_fd(ev, bsds,
1852                                           bsds->fd,
1853                                           TEVENT_FD_ERROR | TEVENT_FD_READ,
1854                                           tstream_bsd_fde_handler,
1855                                           bsds);
1856                 if (!bsds->fde) {
1857                         errno = ENOMEM;
1858                         return -1;
1859                 }
1860
1861                 /* cache the event context we're running on */
1862                 bsds->event_ptr = ev;
1863         } else if (!bsds->readable_handler) {
1864                 TEVENT_FD_READABLE(bsds->fde);
1865                 /*
1866                  * TEVENT_FD_ERROR is likely already set, so
1867                  * TEVENT_FD_WANTERROR() is most likely a no-op.
1868                  */
1869                 TEVENT_FD_WANTERROR(bsds->fde);
1870         }
1871
1872         bsds->readable_handler = handler;
1873         bsds->readable_private = private_data;
1874
1875         return 0;
1876 }
1877
1878 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1879                                              struct tevent_context *ev,
1880                                              void (*handler)(void *private_data),
1881                                              void *private_data)
1882 {
1883         if (ev == NULL) {
1884                 if (handler) {
1885                         errno = EINVAL;
1886                         return -1;
1887                 }
1888                 if (!bsds->writeable_handler) {
1889                         return 0;
1890                 }
1891                 bsds->writeable_handler = NULL;
1892                 bsds->writeable_private = NULL;
1893
1894                 /*
1895                  * The writeable handler is only
1896                  * set if we got EAGAIN or a short
1897                  * writev on the first try, so
1898                  * this isn't the hot path.
1899                  *
1900                  * Here we are lazy and leave TEVENT_FD_ERROR
1901                  * alive as it's shared with the readable
1902                  * handler. So we only clear TEVENT_FD_WRITE.
1903                  */
1904                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1905                 return 0;
1906         }
1907
1908         /* read and write must use the same tevent_context */
1909         if (bsds->event_ptr != ev) {
1910                 if (bsds->readable_handler || bsds->writeable_handler) {
1911                         errno = EINVAL;
1912                         return -1;
1913                 }
1914                 bsds->event_ptr = NULL;
1915                 TALLOC_FREE(bsds->fde);
1916         }
1917
1918         if (tevent_fd_get_flags(bsds->fde) == 0) {
1919                 TALLOC_FREE(bsds->fde);
1920
1921                 bsds->fde = tevent_add_fd(ev, bsds,
1922                                           bsds->fd,
1923                                           TEVENT_FD_ERROR | TEVENT_FD_WRITE,
1924                                           tstream_bsd_fde_handler,
1925                                           bsds);
1926                 if (!bsds->fde) {
1927                         errno = ENOMEM;
1928                         return -1;
1929                 }
1930
1931                 /* cache the event context we're running on */
1932                 bsds->event_ptr = ev;
1933         } else if (!bsds->writeable_handler) {
1934                 TEVENT_FD_WRITEABLE(bsds->fde);
1935                 /*
1936                  * TEVENT_FD_ERROR is likely already set, so
1937                  * TEVENT_FD_WANTERROR() is most likely a no-op.
1938                  */
1939                 TEVENT_FD_WANTERROR(bsds->fde);
1940         }
1941
1942         bsds->writeable_handler = handler;
1943         bsds->writeable_private = private_data;
1944
1945         return 0;
1946 }
1947
1948 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1949 {
1950         struct tstream_bsd *bsds = tstream_context_data(stream,
1951                                    struct tstream_bsd);
1952         ssize_t ret;
1953
1954         if (bsds->fd == -1) {
1955                 errno = ENOTCONN;
1956                 return -1;
1957         }
1958
1959         if (bsds->error != 0) {
1960                 errno = bsds->error;
1961                 return -1;
1962         }
1963
1964         ret = tsocket_bsd_pending(bsds->fd);
1965         if (ret == -1) {
1966                 /*
1967                  * remember the error and don't
1968                  * allow further requests
1969                  */
1970                 bsds->error = errno;
1971         }
1972
1973         return ret;
1974 }
1975
1976 struct tstream_bsd_readv_state {
1977         struct tstream_context *stream;
1978
1979         struct iovec *vector;
1980         size_t count;
1981
1982         int ret;
1983 };
1984
1985 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1986 {
1987         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1988                                    struct tstream_bsd);
1989
1990         tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1991
1992         return 0;
1993 }
1994
1995 static void tstream_bsd_readv_handler(void *private_data);
1996
1997 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1998                                         struct tevent_context *ev,
1999                                         struct tstream_context *stream,
2000                                         struct iovec *vector,
2001                                         size_t count)
2002 {
2003         struct tevent_req *req;
2004         struct tstream_bsd_readv_state *state;
2005         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
2006         int ret;
2007
2008         req = tevent_req_create(mem_ctx, &state,
2009                                 struct tstream_bsd_readv_state);
2010         if (!req) {
2011                 return NULL;
2012         }
2013
2014         state->stream   = stream;
2015         /* we make a copy of the vector so that we can modify it */
2016         state->vector   = talloc_array(state, struct iovec, count);
2017         if (tevent_req_nomem(state->vector, req)) {
2018                 goto post;
2019         }
2020         memcpy(state->vector, vector, sizeof(struct iovec)*count);
2021         state->count    = count;
2022         state->ret      = 0;
2023
2024         talloc_set_destructor(state, tstream_bsd_readv_destructor);
2025
2026         if (bsds->fd == -1) {
2027                 tevent_req_error(req, ENOTCONN);
2028                 goto post;
2029         }
2030
2031         /*
2032          * this is a fast path, not waiting for the
2033          * socket to become explicit readable gains
2034          * about 10%-20% performance in benchmark tests.
2035          */
2036         if (bsds->optimize_readv) {
2037                 /*
2038                  * We only do the optimization on
2039                  * readv if the caller asked for it.
2040                  *
2041                  * This is needed because in most cases
2042                  * we prefer to flush send buffers before
2043                  * receiving incoming requests.
2044                  */
2045                 tstream_bsd_readv_handler(req);
2046                 if (!tevent_req_is_in_progress(req)) {
2047                         goto post;
2048                 }
2049         }
2050
2051         ret = tstream_bsd_set_readable_handler(bsds, ev,
2052                                               tstream_bsd_readv_handler,
2053                                               req);
2054         if (ret == -1) {
2055                 tevent_req_error(req, errno);
2056                 goto post;
2057         }
2058
2059         return req;
2060
2061  post:
2062         tevent_req_post(req, ev);
2063         return req;
2064 }
2065
2066 static void tstream_bsd_readv_handler(void *private_data)
2067 {
2068         struct tevent_req *req = talloc_get_type_abort(private_data,
2069                                  struct tevent_req);
2070         struct tstream_bsd_readv_state *state = tevent_req_data(req,
2071                                         struct tstream_bsd_readv_state);
2072         struct tstream_context *stream = state->stream;
2073         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
2074         int ret;
2075         int err;
2076         int _count;
2077         bool ok, retry;
2078
2079         if (bsds->error != 0) {
2080                 tevent_req_error(req, bsds->error);
2081                 return;
2082         }
2083
2084         ret = readv(bsds->fd, state->vector, state->count);
2085         if (ret == 0) {
2086                 /* propagate end of file */
2087                 bsds->error = EPIPE;
2088                 tevent_req_error(req, EPIPE);
2089                 return;
2090         }
2091         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2092         if (retry) {
2093                 /* retry later */
2094                 return;
2095         }
2096         if (err != 0) {
2097                 /*
2098                  * remember the error and don't
2099                  * allow further requests
2100                  */
2101                 bsds->error = err;
2102         }
2103         if (tevent_req_error(req, err)) {
2104                 return;
2105         }
2106
2107         state->ret += ret;
2108
2109         _count = state->count; /* tstream has size_t count, readv has int */
2110         ok = iov_advance(&state->vector, &_count, ret);
2111         state->count = _count;
2112
2113         if (!ok) {
2114                 tevent_req_error(req, EINVAL);
2115                 return;
2116         }
2117
2118         if (state->count > 0) {
2119                 /* we have more to read */
2120                 return;
2121         }
2122
2123         tevent_req_done(req);
2124 }
2125
2126 static int tstream_bsd_readv_recv(struct tevent_req *req,
2127                                   int *perrno)
2128 {
2129         struct tstream_bsd_readv_state *state = tevent_req_data(req,
2130                                         struct tstream_bsd_readv_state);
2131         int ret;
2132
2133         ret = tsocket_simple_int_recv(req, perrno);
2134         if (ret == 0) {
2135                 ret = state->ret;
2136         }
2137
2138         tevent_req_received(req);
2139         return ret;
2140 }
2141
2142 struct tstream_bsd_writev_state {
2143         struct tstream_context *stream;
2144
2145         struct iovec *vector;
2146         size_t count;
2147
2148         int ret;
2149 };
2150
2151 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
2152 {
2153         struct tstream_bsd *bsds = tstream_context_data(state->stream,
2154                                   struct tstream_bsd);
2155
2156         tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
2157
2158         return 0;
2159 }
2160
2161 static void tstream_bsd_writev_handler(void *private_data);
2162
2163 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
2164                                                  struct tevent_context *ev,
2165                                                  struct tstream_context *stream,
2166                                                  const struct iovec *vector,
2167                                                  size_t count)
2168 {
2169         struct tevent_req *req;
2170         struct tstream_bsd_writev_state *state;
2171         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
2172         int ret;
2173
2174         req = tevent_req_create(mem_ctx, &state,
2175                                 struct tstream_bsd_writev_state);
2176         if (!req) {
2177                 return NULL;
2178         }
2179
2180         state->stream   = stream;
2181         /* we make a copy of the vector so that we can modify it */
2182         state->vector   = talloc_array(state, struct iovec, count);
2183         if (tevent_req_nomem(state->vector, req)) {
2184                 goto post;
2185         }
2186         memcpy(state->vector, vector, sizeof(struct iovec)*count);
2187         state->count    = count;
2188         state->ret      = 0;
2189
2190         talloc_set_destructor(state, tstream_bsd_writev_destructor);
2191
2192         if (bsds->fd == -1) {
2193                 tevent_req_error(req, ENOTCONN);
2194                 goto post;
2195         }
2196
2197         /*
2198          * this is a fast path, not waiting for the
2199          * socket to become explicit writeable gains
2200          * about 10%-20% performance in benchmark tests.
2201          */
2202         tstream_bsd_writev_handler(req);
2203         if (!tevent_req_is_in_progress(req)) {
2204                 goto post;
2205         }
2206
2207         ret = tstream_bsd_set_writeable_handler(bsds, ev,
2208                                                tstream_bsd_writev_handler,
2209                                                req);
2210         if (ret == -1) {
2211                 tevent_req_error(req, errno);
2212                 goto post;
2213         }
2214
2215         return req;
2216
2217  post:
2218         tevent_req_post(req, ev);
2219         return req;
2220 }
2221
2222 static void tstream_bsd_writev_handler(void *private_data)
2223 {
2224         struct tevent_req *req = talloc_get_type_abort(private_data,
2225                                  struct tevent_req);
2226         struct tstream_bsd_writev_state *state = tevent_req_data(req,
2227                                         struct tstream_bsd_writev_state);
2228         struct tstream_context *stream = state->stream;
2229         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
2230         ssize_t ret;
2231         int err;
2232         int _count;
2233         bool ok, retry;
2234
2235         if (bsds->error != 0) {
2236                 tevent_req_error(req, bsds->error);
2237                 return;
2238         }
2239
2240         ret = writev(bsds->fd, state->vector, state->count);
2241         if (ret == 0) {
2242                 /* propagate end of file */
2243                 bsds->error = EPIPE;
2244                 tevent_req_error(req, EPIPE);
2245                 return;
2246         }
2247         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2248         if (retry) {
2249                 /*
2250                  * retry later...
2251                  */
2252                 return;
2253         }
2254         if (err != 0) {
2255                 /*
2256                  * remember the error and don't
2257                  * allow further requests
2258                  */
2259                 bsds->error = err;
2260         }
2261         if (tevent_req_error(req, err)) {
2262                 return;
2263         }
2264
2265         state->ret += ret;
2266
2267         _count = state->count; /* tstream has size_t count, writev has int */
2268         ok = iov_advance(&state->vector, &_count, ret);
2269         state->count = _count;
2270
2271         if (!ok) {
2272                 tevent_req_error(req, EINVAL);
2273                 return;
2274         }
2275
2276         if (state->count > 0) {
2277                 /*
2278                  * we have more to write
2279                  */
2280                 return;
2281         }
2282
2283         tevent_req_done(req);
2284 }
2285
2286 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
2287 {
2288         struct tstream_bsd_writev_state *state = tevent_req_data(req,
2289                                         struct tstream_bsd_writev_state);
2290         int ret;
2291
2292         ret = tsocket_simple_int_recv(req, perrno);
2293         if (ret == 0) {
2294                 ret = state->ret;
2295         }
2296
2297         tevent_req_received(req);
2298         return ret;
2299 }
2300
2301 struct tstream_bsd_disconnect_state {
2302         void *__dummy;
2303 };
2304
2305 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
2306                                                      struct tevent_context *ev,
2307                                                      struct tstream_context *stream)
2308 {
2309         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
2310         struct tevent_req *req;
2311         struct tstream_bsd_disconnect_state *state;
2312         int ret;
2313         int err;
2314         bool dummy;
2315
2316         req = tevent_req_create(mem_ctx, &state,
2317                                 struct tstream_bsd_disconnect_state);
2318         if (req == NULL) {
2319                 return NULL;
2320         }
2321
2322         if (bsds->fd == -1) {
2323                 tevent_req_error(req, ENOTCONN);
2324                 goto post;
2325         }
2326
2327         TALLOC_FREE(bsds->fde);
2328         ret = close(bsds->fd);
2329         bsds->fd = -1;
2330         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
2331         if (tevent_req_error(req, err)) {
2332                 goto post;
2333         }
2334
2335         tevent_req_done(req);
2336 post:
2337         tevent_req_post(req, ev);
2338         return req;
2339 }
2340
2341 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
2342                                       int *perrno)
2343 {
2344         int ret;
2345
2346         ret = tsocket_simple_int_recv(req, perrno);
2347
2348         tevent_req_received(req);
2349         return ret;
2350 }
2351
2352 static const struct tstream_context_ops tstream_bsd_ops = {
2353         .name                   = "bsd",
2354
2355         .pending_bytes          = tstream_bsd_pending_bytes,
2356
2357         .readv_send             = tstream_bsd_readv_send,
2358         .readv_recv             = tstream_bsd_readv_recv,
2359
2360         .writev_send            = tstream_bsd_writev_send,
2361         .writev_recv            = tstream_bsd_writev_recv,
2362
2363         .disconnect_send        = tstream_bsd_disconnect_send,
2364         .disconnect_recv        = tstream_bsd_disconnect_recv,
2365 };
2366
2367 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
2368 {
2369         TALLOC_FREE(bsds->fde);
2370         if (bsds->fd != -1) {
2371                 close(bsds->fd);
2372                 bsds->fd = -1;
2373         }
2374         return 0;
2375 }
2376
2377 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
2378                                  int fd,
2379                                  struct tstream_context **_stream,
2380                                  const char *location)
2381 {
2382         struct tstream_context *stream;
2383         struct tstream_bsd *bsds;
2384
2385         stream = tstream_context_create(mem_ctx,
2386                                         &tstream_bsd_ops,
2387                                         &bsds,
2388                                         struct tstream_bsd,
2389                                         location);
2390         if (!stream) {
2391                 return -1;
2392         }
2393         ZERO_STRUCTP(bsds);
2394         bsds->fd = fd;
2395         talloc_set_destructor(bsds, tstream_bsd_destructor);
2396
2397         *_stream = stream;
2398         return 0;
2399 }
2400
2401 struct tstream_bsd_connect_state {
2402         int fd;
2403         struct tevent_fd *fde;
2404         struct tstream_conext *stream;
2405         struct tsocket_address *local;
2406 };
2407
2408 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
2409 {
2410         TALLOC_FREE(state->fde);
2411         if (state->fd != -1) {
2412                 close(state->fd);
2413                 state->fd = -1;
2414         }
2415
2416         return 0;
2417 }
2418
2419 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2420                                             struct tevent_fd *fde,
2421                                             uint16_t flags,
2422                                             void *private_data);
2423
2424 static struct tevent_req *tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
2425                                         struct tevent_context *ev,
2426                                         int sys_errno,
2427                                         const struct tsocket_address *local,
2428                                         const struct tsocket_address *remote)
2429 {
2430         struct tevent_req *req;
2431         struct tstream_bsd_connect_state *state;
2432         struct samba_sockaddr *lbsda =
2433                 talloc_get_type_abort(local->private_data,
2434                 struct samba_sockaddr);
2435         struct samba_sockaddr *lrbsda = NULL;
2436         struct samba_sockaddr *rbsda =
2437                 talloc_get_type_abort(remote->private_data,
2438                 struct samba_sockaddr);
2439         int ret;
2440         bool do_bind = false;
2441         bool do_reuseaddr = false;
2442         bool do_ipv6only = false;
2443         bool is_inet = false;
2444         int sa_fam = lbsda->u.sa.sa_family;
2445
2446         req = tevent_req_create(mem_ctx, &state,
2447                                 struct tstream_bsd_connect_state);
2448         if (!req) {
2449                 return NULL;
2450         }
2451         state->fd = -1;
2452         state->fde = NULL;
2453
2454         talloc_set_destructor(state, tstream_bsd_connect_destructor);
2455
2456         /* give the wrappers a chance to report an error */
2457         if (sys_errno != 0) {
2458                 tevent_req_error(req, sys_errno);
2459                 goto post;
2460         }
2461
2462         switch (lbsda->u.sa.sa_family) {
2463         case AF_UNIX:
2464                 if (lbsda->u.un.sun_path[0] != 0) {
2465                         do_reuseaddr = true;
2466                         do_bind = true;
2467                 }
2468                 break;
2469         case AF_INET:
2470                 if (lbsda->u.in.sin_port != 0) {
2471                         do_reuseaddr = true;
2472                         do_bind = true;
2473                 }
2474                 if (lbsda->u.in.sin_addr.s_addr != INADDR_ANY) {
2475                         do_bind = true;
2476                 }
2477                 is_inet = true;
2478                 break;
2479 #ifdef HAVE_IPV6
2480         case AF_INET6:
2481                 if (lbsda->u.in6.sin6_port != 0) {
2482                         do_reuseaddr = true;
2483                         do_bind = true;
2484                 }
2485                 if (memcmp(&in6addr_any,
2486                            &lbsda->u.in6.sin6_addr,
2487                            sizeof(in6addr_any)) != 0) {
2488                         do_bind = true;
2489                 }
2490                 is_inet = true;
2491                 do_ipv6only = true;
2492                 break;
2493 #endif
2494         default:
2495                 tevent_req_error(req, EINVAL);
2496                 goto post;
2497         }
2498
2499         if (!do_bind && is_inet) {
2500                 sa_fam = rbsda->u.sa.sa_family;
2501                 switch (sa_fam) {
2502                 case AF_INET:
2503                         do_ipv6only = false;
2504                         break;
2505 #ifdef HAVE_IPV6
2506                 case AF_INET6:
2507                         do_ipv6only = true;
2508                         break;
2509 #endif
2510                 }
2511         }
2512
2513         if (is_inet) {
2514                 state->local = tsocket_address_create(state,
2515                                                       &tsocket_address_bsd_ops,
2516                                                       &lrbsda,
2517                                                       struct samba_sockaddr,
2518                                                       __location__ "bsd_connect");
2519                 if (tevent_req_nomem(state->local, req)) {
2520                         goto post;
2521                 }
2522
2523                 ZERO_STRUCTP(lrbsda);
2524                 lrbsda->sa_socklen = sizeof(lrbsda->u.ss);
2525 #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
2526                 lrbsda->u.sa.sa_len = lrbsda->sa_socklen;
2527 #endif
2528         }
2529
2530         state->fd = socket(sa_fam, SOCK_STREAM, 0);
2531         if (state->fd == -1) {
2532                 tevent_req_error(req, errno);
2533                 goto post;
2534         }
2535
2536         state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
2537         if (state->fd == -1) {
2538                 tevent_req_error(req, errno);
2539                 goto post;
2540         }
2541
2542 #ifdef HAVE_IPV6
2543         if (do_ipv6only) {
2544                 int val = 1;
2545
2546                 ret = setsockopt(state->fd, IPPROTO_IPV6, IPV6_V6ONLY,
2547                                  (const void *)&val, sizeof(val));
2548                 if (ret == -1) {
2549                         tevent_req_error(req, errno);
2550                         goto post;
2551                 }
2552         }
2553 #endif
2554
2555         if (do_reuseaddr) {
2556                 int val = 1;
2557
2558                 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
2559                                  (const void *)&val, sizeof(val));
2560                 if (ret == -1) {
2561                         tevent_req_error(req, errno);
2562                         goto post;
2563                 }
2564         }
2565
2566         if (do_bind) {
2567                 ret = bind(state->fd, &lbsda->u.sa, lbsda->sa_socklen);
2568                 if (ret == -1) {
2569                         tevent_req_error(req, errno);
2570                         goto post;
2571                 }
2572         }
2573
2574         if (rbsda->u.sa.sa_family != sa_fam) {
2575                 tevent_req_error(req, EINVAL);
2576                 goto post;
2577         }
2578
2579         ret = connect(state->fd, &rbsda->u.sa, rbsda->sa_socklen);
2580         if (ret == -1) {
2581                 if (errno == EINPROGRESS) {
2582                         goto async;
2583                 }
2584                 tevent_req_error(req, errno);
2585                 goto post;
2586         }
2587
2588         if (!state->local) {
2589                 tevent_req_done(req);
2590                 goto post;
2591         }
2592
2593         if (lrbsda != NULL) {
2594                 ret = getsockname(state->fd,
2595                                   &lrbsda->u.sa,
2596                                   &lrbsda->sa_socklen);
2597                 if (ret == -1) {
2598                         tevent_req_error(req, errno);
2599                         goto post;
2600                 }
2601         }
2602
2603         tevent_req_done(req);
2604         goto post;
2605
2606  async:
2607
2608         /*
2609          * Note for historic reasons TEVENT_FD_WRITE is not enough
2610          * to get notified for POLLERR or EPOLLHUP even if they
2611          * come together with POLLOUT. That means we need to
2612          * use TEVENT_FD_READ in addition until we have
2613          * TEVENT_FD_ERROR.
2614          */
2615         state->fde = tevent_add_fd(ev, state,
2616                                    state->fd,
2617                                    TEVENT_FD_ERROR | TEVENT_FD_WRITE,
2618                                    tstream_bsd_connect_fde_handler,
2619                                    req);
2620         if (tevent_req_nomem(state->fde, req)) {
2621                 goto post;
2622         }
2623
2624         return req;
2625
2626  post:
2627         tevent_req_post(req, ev);
2628         return req;
2629 }
2630
2631 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2632                                             struct tevent_fd *fde,
2633                                             uint16_t flags,
2634                                             void *private_data)
2635 {
2636         struct tevent_req *req = talloc_get_type_abort(private_data,
2637                                  struct tevent_req);
2638         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2639                                         struct tstream_bsd_connect_state);
2640         struct samba_sockaddr *lrbsda = NULL;
2641         int ret;
2642         int err;
2643         bool retry;
2644
2645         ret = samba_socket_sock_error(state->fd);
2646         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2647         if (retry) {
2648                 /* retry later */
2649                 return;
2650         }
2651         if (tevent_req_error(req, err)) {
2652                 return;
2653         }
2654
2655         if (!state->local) {
2656                 tevent_req_done(req);
2657                 return;
2658         }
2659
2660         lrbsda = talloc_get_type_abort(state->local->private_data,
2661                                        struct samba_sockaddr);
2662
2663         ret = getsockname(state->fd, &lrbsda->u.sa, &lrbsda->sa_socklen);
2664         if (ret == -1) {
2665                 tevent_req_error(req, errno);
2666                 return;
2667         }
2668
2669         tevent_req_done(req);
2670 }
2671
2672 static int tstream_bsd_connect_recv(struct tevent_req *req,
2673                                     int *perrno,
2674                                     TALLOC_CTX *mem_ctx,
2675                                     struct tstream_context **stream,
2676                                     struct tsocket_address **local,
2677                                     const char *location)
2678 {
2679         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2680                                         struct tstream_bsd_connect_state);
2681         int ret;
2682
2683         ret = tsocket_simple_int_recv(req, perrno);
2684         if (ret == 0) {
2685                 ret = _tstream_bsd_existing_socket(mem_ctx,
2686                                                    state->fd,
2687                                                    stream,
2688                                                    location);
2689                 if (ret == -1) {
2690                         *perrno = errno;
2691                         goto done;
2692                 }
2693                 TALLOC_FREE(state->fde);
2694                 state->fd = -1;
2695
2696                 if (local) {
2697                         *local = talloc_move(mem_ctx, &state->local);
2698                 }
2699         }
2700
2701 done:
2702         tevent_req_received(req);
2703         return ret;
2704 }
2705
2706 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2707                                         struct tevent_context *ev,
2708                                         const struct tsocket_address *local,
2709                                         const struct tsocket_address *remote)
2710 {
2711         struct samba_sockaddr *lbsda =
2712                 talloc_get_type_abort(local->private_data,
2713                 struct samba_sockaddr);
2714         struct tevent_req *req;
2715         int sys_errno = 0;
2716
2717         switch (lbsda->u.sa.sa_family) {
2718         case AF_INET:
2719                 break;
2720 #ifdef HAVE_IPV6
2721         case AF_INET6:
2722                 break;
2723 #endif
2724         default:
2725                 sys_errno = EINVAL;
2726                 break;
2727         }
2728
2729         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2730
2731         return req;
2732 }
2733
2734 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2735                                    int *perrno,
2736                                    TALLOC_CTX *mem_ctx,
2737                                    struct tstream_context **stream,
2738                                    struct tsocket_address **local,
2739                                    const char *location)
2740 {
2741         return tstream_bsd_connect_recv(req, perrno,
2742                                         mem_ctx, stream, local,
2743                                         location);
2744 }
2745
2746 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2747                                         struct tevent_context *ev,
2748                                         const struct tsocket_address *local,
2749                                         const struct tsocket_address *remote)
2750 {
2751         struct samba_sockaddr *lbsda =
2752                 talloc_get_type_abort(local->private_data,
2753                 struct samba_sockaddr);
2754         struct tevent_req *req;
2755         int sys_errno = 0;
2756
2757         switch (lbsda->u.sa.sa_family) {
2758         case AF_UNIX:
2759                 break;
2760         default:
2761                 sys_errno = EINVAL;
2762                 break;
2763         }
2764
2765         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2766
2767         return req;
2768 }
2769
2770 int _tstream_unix_connect_recv(struct tevent_req *req,
2771                                       int *perrno,
2772                                       TALLOC_CTX *mem_ctx,
2773                                       struct tstream_context **stream,
2774                                       const char *location)
2775 {
2776         return tstream_bsd_connect_recv(req, perrno,
2777                                         mem_ctx, stream, NULL,
2778                                         location);
2779 }
2780
2781 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2782                              struct tstream_context **_stream1,
2783                              TALLOC_CTX *mem_ctx2,
2784                              struct tstream_context **_stream2,
2785                              const char *location)
2786 {
2787         int ret;
2788         int fds[2];
2789         int fd1;
2790         int fd2;
2791         struct tstream_context *stream1 = NULL;
2792         struct tstream_context *stream2 = NULL;
2793
2794         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2795         if (ret == -1) {
2796                 return -1;
2797         }
2798         fd1 = fds[0];
2799         fd2 = fds[1];
2800
2801         fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2802         if (fd1 == -1) {
2803                 int sys_errno = errno;
2804                 close(fd2);
2805                 errno = sys_errno;
2806                 return -1;
2807         }
2808
2809         fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2810         if (fd2 == -1) {
2811                 int sys_errno = errno;
2812                 close(fd1);
2813                 errno = sys_errno;
2814                 return -1;
2815         }
2816
2817         ret = _tstream_bsd_existing_socket(mem_ctx1,
2818                                            fd1,
2819                                            &stream1,
2820                                            location);
2821         if (ret == -1) {
2822                 int sys_errno = errno;
2823                 close(fd1);
2824                 close(fd2);
2825                 errno = sys_errno;
2826                 return -1;
2827         }
2828
2829         ret = _tstream_bsd_existing_socket(mem_ctx2,
2830                                            fd2,
2831                                            &stream2,
2832                                            location);
2833         if (ret == -1) {
2834                 int sys_errno = errno;
2835                 talloc_free(stream1);
2836                 close(fd2);
2837                 errno = sys_errno;
2838                 return -1;
2839         }
2840
2841         *_stream1 = stream1;
2842         *_stream2 = stream2;
2843         return 0;
2844 }
2845