tsocket: allow AF_UNIX sockaddrs smaller than sizeof(sockaddr_un)
[samba.git] / lib / tsocket / tsocket_bsd.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Copyright (C) Stefan Metzmacher 2009
5
6      ** NOTE! The following LGPL license applies to the tsocket
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 3 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "replace.h"
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "tsocket.h"
28 #include "tsocket_internal.h"
29
30 static int tsocket_bsd_error_from_errno(int ret,
31                                         int sys_errno,
32                                         bool *retry)
33 {
34         *retry = false;
35
36         if (ret >= 0) {
37                 return 0;
38         }
39
40         if (ret != -1) {
41                 return EIO;
42         }
43
44         if (sys_errno == 0) {
45                 return EIO;
46         }
47
48         if (sys_errno == EINTR) {
49                 *retry = true;
50                 return sys_errno;
51         }
52
53         if (sys_errno == EINPROGRESS) {
54                 *retry = true;
55                 return sys_errno;
56         }
57
58         if (sys_errno == EAGAIN) {
59                 *retry = true;
60                 return sys_errno;
61         }
62
63 #ifdef EWOULDBLOCK
64         if (sys_errno == EWOULDBLOCK) {
65                 *retry = true;
66                 return sys_errno;
67         }
68 #endif
69
70         return sys_errno;
71 }
72
73 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
74 {
75         int i;
76         int sys_errno = 0;
77         int fds[3];
78         int num_fds = 0;
79
80         int result, flags;
81
82         if (fd == -1) {
83                 return -1;
84         }
85
86         /* first make a fd >= 3 */
87         if (high_fd) {
88                 while (fd < 3) {
89                         fds[num_fds++] = fd;
90                         fd = dup(fd);
91                         if (fd == -1) {
92                                 sys_errno = errno;
93                                 break;
94                         }
95                 }
96                 for (i=0; i<num_fds; i++) {
97                         close(fds[i]);
98                 }
99                 if (fd == -1) {
100                         errno = sys_errno;
101                         return fd;
102                 }
103         }
104
105         /* fd should be nonblocking. */
106
107 #ifdef O_NONBLOCK
108 #define FLAG_TO_SET O_NONBLOCK
109 #else
110 #ifdef SYSV
111 #define FLAG_TO_SET O_NDELAY
112 #else /* BSD */
113 #define FLAG_TO_SET FNDELAY
114 #endif
115 #endif
116
117         if ((flags = fcntl(fd, F_GETFL)) == -1) {
118                 goto fail;
119         }
120
121         flags |= FLAG_TO_SET;
122         if (fcntl(fd, F_SETFL, flags) == -1) {
123                 goto fail;
124         }
125
126 #undef FLAG_TO_SET
127
128         /* fd should be closed on exec() */
129 #ifdef FD_CLOEXEC
130         result = flags = fcntl(fd, F_GETFD, 0);
131         if (flags >= 0) {
132                 flags |= FD_CLOEXEC;
133                 result = fcntl(fd, F_SETFD, flags);
134         }
135         if (result < 0) {
136                 goto fail;
137         }
138 #endif
139         return fd;
140
141  fail:
142         if (fd != -1) {
143                 sys_errno = errno;
144                 close(fd);
145                 errno = sys_errno;
146         }
147         return -1;
148 }
149
150 static ssize_t tsocket_bsd_pending(int fd)
151 {
152         int ret, error;
153         int value = 0;
154         socklen_t len;
155
156         ret = ioctl(fd, FIONREAD, &value);
157         if (ret == -1) {
158                 return ret;
159         }
160
161         if (ret != 0) {
162                 /* this should not be reached */
163                 errno = EIO;
164                 return -1;
165         }
166
167         if (value != 0) {
168                 return value;
169         }
170
171         error = 0;
172         len = sizeof(error);
173
174         /*
175          * if no data is available check if the socket is in error state. For
176          * dgram sockets it's the way to return ICMP error messages of
177          * connected sockets to the caller.
178          */
179         ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
180         if (ret == -1) {
181                 return ret;
182         }
183         if (error != 0) {
184                 errno = error;
185                 return -1;
186         }
187         return 0;
188 }
189
190 static const struct tsocket_address_ops tsocket_address_bsd_ops;
191
192 struct tsocket_address_bsd {
193         union {
194                 struct sockaddr sa;
195                 struct sockaddr_in in;
196 #ifdef HAVE_IPV6
197                 struct sockaddr_in6 in6;
198 #endif
199                 struct sockaddr_un un;
200                 struct sockaddr_storage ss;
201         } u;
202 };
203
204 int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
205                                        struct sockaddr *sa,
206                                        size_t sa_socklen,
207                                        struct tsocket_address **_addr,
208                                        const char *location)
209 {
210         struct tsocket_address *addr;
211         struct tsocket_address_bsd *bsda;
212
213         if (sa_socklen < sizeof(sa->sa_family)) {
214                 errno = EINVAL;
215                 return -1;
216         }
217
218         switch (sa->sa_family) {
219         case AF_UNIX:
220                 break;
221         case AF_INET:
222                 if (sa_socklen < sizeof(struct sockaddr_in)) {
223                         errno = EINVAL;
224                         return -1;
225                 }
226                 break;
227 #ifdef HAVE_IPV6
228         case AF_INET6:
229                 if (sa_socklen < sizeof(struct sockaddr_in6)) {
230                         errno = EINVAL;
231                         return -1;
232                 }
233                 break;
234 #endif
235         default:
236                 errno = EAFNOSUPPORT;
237                 return -1;
238         }
239
240         if (sa_socklen > sizeof(struct sockaddr_storage)) {
241                 errno = EINVAL;
242                 return -1;
243         }
244
245         addr = tsocket_address_create(mem_ctx,
246                                       &tsocket_address_bsd_ops,
247                                       &bsda,
248                                       struct tsocket_address_bsd,
249                                       location);
250         if (!addr) {
251                 errno = ENOMEM;
252                 return -1;
253         }
254
255         ZERO_STRUCTP(bsda);
256
257         memcpy(&bsda->u.ss, sa, sa_socklen);
258
259         *_addr = addr;
260         return 0;
261 }
262
263 ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
264                                      struct sockaddr *sa,
265                                      size_t sa_socklen)
266 {
267         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
268                                            struct tsocket_address_bsd);
269         ssize_t rlen = 0;
270
271         if (!bsda) {
272                 errno = EINVAL;
273                 return -1;
274         }
275
276         switch (bsda->u.sa.sa_family) {
277         case AF_UNIX:
278                 rlen = sizeof(struct sockaddr_un);
279                 break;
280         case AF_INET:
281                 rlen = sizeof(struct sockaddr_in);
282                 break;
283 #ifdef HAVE_IPV6
284         case AF_INET6:
285                 rlen = sizeof(struct sockaddr_in6);
286                 break;
287 #endif
288         default:
289                 errno = EAFNOSUPPORT;
290                 return -1;
291         }
292
293         if (sa_socklen < rlen) {
294                 errno = EINVAL;
295                 return -1;
296         }
297
298         if (sa_socklen > sizeof(struct sockaddr_storage)) {
299                 memset(sa, 0, sa_socklen);
300                 sa_socklen = sizeof(struct sockaddr_storage);
301         }
302
303         memcpy(sa, &bsda->u.ss, sa_socklen);
304         return rlen;
305 }
306
307 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
308                                        const char *fam,
309                                        const char *addr,
310                                        uint16_t port,
311                                        struct tsocket_address **_addr,
312                                        const char *location)
313 {
314         struct addrinfo hints;
315         struct addrinfo *result = NULL;
316         char port_str[6];
317         int ret;
318
319         ZERO_STRUCT(hints);
320         /*
321          * we use SOCKET_STREAM here to get just one result
322          * back from getaddrinfo().
323          */
324         hints.ai_socktype = SOCK_STREAM;
325         hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
326
327         if (strcasecmp(fam, "ip") == 0) {
328                 hints.ai_family = AF_UNSPEC;
329                 if (!addr) {
330 #ifdef HAVE_IPV6
331                         addr = "::";
332 #else
333                         addr = "0.0.0.0";
334 #endif
335                 }
336         } else if (strcasecmp(fam, "ipv4") == 0) {
337                 hints.ai_family = AF_INET;
338                 if (!addr) {
339                         addr = "0.0.0.0";
340                 }
341 #ifdef HAVE_IPV6
342         } else if (strcasecmp(fam, "ipv6") == 0) {
343                 hints.ai_family = AF_INET6;
344                 if (!addr) {
345                         addr = "::";
346                 }
347 #endif
348         } else {
349                 errno = EAFNOSUPPORT;
350                 return -1;
351         }
352
353         snprintf(port_str, sizeof(port_str) - 1, "%u", port);
354
355         ret = getaddrinfo(addr, port_str, &hints, &result);
356         if (ret != 0) {
357                 switch (ret) {
358                 case EAI_FAIL:
359                         errno = EINVAL;
360                         break;
361                 }
362                 ret = -1;
363                 goto done;
364         }
365
366         if (result->ai_socktype != SOCK_STREAM) {
367                 errno = EINVAL;
368                 ret = -1;
369                 goto done;
370         }
371
372         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
373                                                   result->ai_addr,
374                                                   result->ai_addrlen,
375                                                   _addr,
376                                                   location);
377
378 done:
379         if (result) {
380                 freeaddrinfo(result);
381         }
382         return ret;
383 }
384
385 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
386                                        TALLOC_CTX *mem_ctx)
387 {
388         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
389                                            struct tsocket_address_bsd);
390         char addr_str[INET6_ADDRSTRLEN+1];
391         const char *str;
392
393         if (!bsda) {
394                 errno = EINVAL;
395                 return NULL;
396         }
397
398         switch (bsda->u.sa.sa_family) {
399         case AF_INET:
400                 str = inet_ntop(bsda->u.in.sin_family,
401                                 &bsda->u.in.sin_addr,
402                                 addr_str, sizeof(addr_str));
403                 break;
404 #ifdef HAVE_IPV6
405         case AF_INET6:
406                 str = inet_ntop(bsda->u.in6.sin6_family,
407                                 &bsda->u.in6.sin6_addr,
408                                 addr_str, sizeof(addr_str));
409                 break;
410 #endif
411         default:
412                 errno = EINVAL;
413                 return NULL;
414         }
415
416         if (!str) {
417                 return NULL;
418         }
419
420         return talloc_strdup(mem_ctx, str);
421 }
422
423 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
424 {
425         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
426                                            struct tsocket_address_bsd);
427         uint16_t port = 0;
428
429         if (!bsda) {
430                 errno = EINVAL;
431                 return 0;
432         }
433
434         switch (bsda->u.sa.sa_family) {
435         case AF_INET:
436                 port = ntohs(bsda->u.in.sin_port);
437                 break;
438 #ifdef HAVE_IPV6
439         case AF_INET6:
440                 port = ntohs(bsda->u.in6.sin6_port);
441                 break;
442 #endif
443         default:
444                 errno = EINVAL;
445                 return 0;
446         }
447
448         return port;
449 }
450
451 int tsocket_address_inet_set_port(struct tsocket_address *addr,
452                                   uint16_t port)
453 {
454         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
455                                            struct tsocket_address_bsd);
456
457         if (!bsda) {
458                 errno = EINVAL;
459                 return -1;
460         }
461
462         switch (bsda->u.sa.sa_family) {
463         case AF_INET:
464                 bsda->u.in.sin_port = htons(port);
465                 break;
466 #ifdef HAVE_IPV6
467         case AF_INET6:
468                 bsda->u.in6.sin6_port = htons(port);
469                 break;
470 #endif
471         default:
472                 errno = EINVAL;
473                 return -1;
474         }
475
476         return 0;
477 }
478
479 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
480                                     const char *path,
481                                     struct tsocket_address **_addr,
482                                     const char *location)
483 {
484         struct sockaddr_un un;
485         void *p = &un;
486         int ret;
487
488         if (!path) {
489                 path = "";
490         }
491
492         if (strlen(path) > sizeof(un.sun_path)-1) {
493                 errno = ENAMETOOLONG;
494                 return -1;
495         }
496
497         ZERO_STRUCT(un);
498         un.sun_family = AF_UNIX;
499         strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
500
501         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
502                                                  (struct sockaddr *)p,
503                                                  sizeof(un),
504                                                  _addr,
505                                                  location);
506
507         return ret;
508 }
509
510 char *tsocket_address_unix_path(const struct tsocket_address *addr,
511                                 TALLOC_CTX *mem_ctx)
512 {
513         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
514                                            struct tsocket_address_bsd);
515         const char *str;
516
517         if (!bsda) {
518                 errno = EINVAL;
519                 return NULL;
520         }
521
522         switch (bsda->u.sa.sa_family) {
523         case AF_UNIX:
524                 str = bsda->u.un.sun_path;
525                 break;
526         default:
527                 errno = EINVAL;
528                 return NULL;
529         }
530
531         return talloc_strdup(mem_ctx, str);
532 }
533
534 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
535                                         TALLOC_CTX *mem_ctx)
536 {
537         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
538                                            struct tsocket_address_bsd);
539         char *str;
540         char *addr_str;
541         const char *prefix = NULL;
542         uint16_t port;
543
544         switch (bsda->u.sa.sa_family) {
545         case AF_UNIX:
546                 return talloc_asprintf(mem_ctx, "unix:%s",
547                                        bsda->u.un.sun_path);
548         case AF_INET:
549                 prefix = "ipv4";
550                 break;
551 #ifdef HAVE_IPV6
552         case AF_INET6:
553                 prefix = "ipv6";
554                 break;
555 #endif
556         default:
557                 errno = EINVAL;
558                 return NULL;
559         }
560
561         addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
562         if (!addr_str) {
563                 return NULL;
564         }
565
566         port = tsocket_address_inet_port(addr);
567
568         str = talloc_asprintf(mem_ctx, "%s:%s:%u",
569                               prefix, addr_str, port);
570         talloc_free(addr_str);
571
572         return str;
573 }
574
575 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
576                                                          TALLOC_CTX *mem_ctx,
577                                                          const char *location)
578 {
579         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
580                                            struct tsocket_address_bsd);
581         struct tsocket_address *copy;
582         int ret;
583
584         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
585                                                  &bsda->u.sa,
586                                                  sizeof(bsda->u.ss),
587                                                  &copy,
588                                                  location);
589         if (ret != 0) {
590                 return NULL;
591         }
592
593         return copy;
594 }
595
596 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
597         .name           = "bsd",
598         .string         = tsocket_address_bsd_string,
599         .copy           = tsocket_address_bsd_copy,
600 };
601
602 struct tdgram_bsd {
603         int fd;
604
605         void *event_ptr;
606         struct tevent_fd *fde;
607
608         void *readable_private;
609         void (*readable_handler)(void *private_data);
610         void *writeable_private;
611         void (*writeable_handler)(void *private_data);
612 };
613
614 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
615                                    struct tevent_fd *fde,
616                                    uint16_t flags,
617                                    void *private_data)
618 {
619         struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
620                                   struct tdgram_bsd);
621
622         if (flags & TEVENT_FD_WRITE) {
623                 bsds->writeable_handler(bsds->writeable_private);
624                 return;
625         }
626         if (flags & TEVENT_FD_READ) {
627                 if (!bsds->readable_handler) {
628                         TEVENT_FD_NOT_READABLE(bsds->fde);
629                         return;
630                 }
631                 bsds->readable_handler(bsds->readable_private);
632                 return;
633         }
634 }
635
636 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
637                                            struct tevent_context *ev,
638                                            void (*handler)(void *private_data),
639                                            void *private_data)
640 {
641         if (ev == NULL) {
642                 if (handler) {
643                         errno = EINVAL;
644                         return -1;
645                 }
646                 if (!bsds->readable_handler) {
647                         return 0;
648                 }
649                 bsds->readable_handler = NULL;
650                 bsds->readable_private = NULL;
651
652                 return 0;
653         }
654
655         /* read and write must use the same tevent_context */
656         if (bsds->event_ptr != ev) {
657                 if (bsds->readable_handler || bsds->writeable_handler) {
658                         errno = EINVAL;
659                         return -1;
660                 }
661                 bsds->event_ptr = NULL;
662                 TALLOC_FREE(bsds->fde);
663         }
664
665         if (tevent_fd_get_flags(bsds->fde) == 0) {
666                 TALLOC_FREE(bsds->fde);
667
668                 bsds->fde = tevent_add_fd(ev, bsds,
669                                           bsds->fd, TEVENT_FD_READ,
670                                           tdgram_bsd_fde_handler,
671                                           bsds);
672                 if (!bsds->fde) {
673                         errno = ENOMEM;
674                         return -1;
675                 }
676
677                 /* cache the event context we're running on */
678                 bsds->event_ptr = ev;
679         } else if (!bsds->readable_handler) {
680                 TEVENT_FD_READABLE(bsds->fde);
681         }
682
683         bsds->readable_handler = handler;
684         bsds->readable_private = private_data;
685
686         return 0;
687 }
688
689 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
690                                             struct tevent_context *ev,
691                                             void (*handler)(void *private_data),
692                                             void *private_data)
693 {
694         if (ev == NULL) {
695                 if (handler) {
696                         errno = EINVAL;
697                         return -1;
698                 }
699                 if (!bsds->writeable_handler) {
700                         return 0;
701                 }
702                 bsds->writeable_handler = NULL;
703                 bsds->writeable_private = NULL;
704                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
705
706                 return 0;
707         }
708
709         /* read and write must use the same tevent_context */
710         if (bsds->event_ptr != ev) {
711                 if (bsds->readable_handler || bsds->writeable_handler) {
712                         errno = EINVAL;
713                         return -1;
714                 }
715                 bsds->event_ptr = NULL;
716                 TALLOC_FREE(bsds->fde);
717         }
718
719         if (tevent_fd_get_flags(bsds->fde) == 0) {
720                 TALLOC_FREE(bsds->fde);
721
722                 bsds->fde = tevent_add_fd(ev, bsds,
723                                           bsds->fd, TEVENT_FD_WRITE,
724                                           tdgram_bsd_fde_handler,
725                                           bsds);
726                 if (!bsds->fde) {
727                         errno = ENOMEM;
728                         return -1;
729                 }
730
731                 /* cache the event context we're running on */
732                 bsds->event_ptr = ev;
733         } else if (!bsds->writeable_handler) {
734                 TEVENT_FD_WRITEABLE(bsds->fde);
735         }
736
737         bsds->writeable_handler = handler;
738         bsds->writeable_private = private_data;
739
740         return 0;
741 }
742
743 struct tdgram_bsd_recvfrom_state {
744         struct tdgram_context *dgram;
745
746         uint8_t *buf;
747         size_t len;
748         struct tsocket_address *src;
749 };
750
751 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
752 {
753         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
754                                   struct tdgram_bsd);
755
756         tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
757
758         return 0;
759 }
760
761 static void tdgram_bsd_recvfrom_handler(void *private_data);
762
763 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
764                                         struct tevent_context *ev,
765                                         struct tdgram_context *dgram)
766 {
767         struct tevent_req *req;
768         struct tdgram_bsd_recvfrom_state *state;
769         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
770         int ret;
771
772         req = tevent_req_create(mem_ctx, &state,
773                                 struct tdgram_bsd_recvfrom_state);
774         if (!req) {
775                 return NULL;
776         }
777
778         state->dgram    = dgram;
779         state->buf      = NULL;
780         state->len      = 0;
781         state->src      = NULL;
782
783         talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
784
785         if (bsds->fd == -1) {
786                 tevent_req_error(req, ENOTCONN);
787                 goto post;
788         }
789
790         /*
791          * this is a fast path, not waiting for the
792          * socket to become explicit readable gains
793          * about 10%-20% performance in benchmark tests.
794          */
795         tdgram_bsd_recvfrom_handler(req);
796         if (!tevent_req_is_in_progress(req)) {
797                 goto post;
798         }
799
800         ret = tdgram_bsd_set_readable_handler(bsds, ev,
801                                               tdgram_bsd_recvfrom_handler,
802                                               req);
803         if (ret == -1) {
804                 tevent_req_error(req, errno);
805                 goto post;
806         }
807
808         return req;
809
810  post:
811         tevent_req_post(req, ev);
812         return req;
813 }
814
815 static void tdgram_bsd_recvfrom_handler(void *private_data)
816 {
817         struct tevent_req *req = talloc_get_type_abort(private_data,
818                                  struct tevent_req);
819         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
820                                         struct tdgram_bsd_recvfrom_state);
821         struct tdgram_context *dgram = state->dgram;
822         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
823         struct tsocket_address_bsd *bsda;
824         ssize_t ret;
825         struct sockaddr *sa = NULL;
826         socklen_t sa_socklen = 0;
827         int err;
828         bool retry;
829
830         ret = tsocket_bsd_pending(bsds->fd);
831         if (ret == 0) {
832                 /* retry later */
833                 return;
834         }
835         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
836         if (retry) {
837                 /* retry later */
838                 return;
839         }
840         if (tevent_req_error(req, err)) {
841                 return;
842         }
843
844         state->buf = talloc_array(state, uint8_t, ret);
845         if (tevent_req_nomem(state->buf, req)) {
846                 return;
847         }
848         state->len = ret;
849
850         state->src = tsocket_address_create(state,
851                                             &tsocket_address_bsd_ops,
852                                             &bsda,
853                                             struct tsocket_address_bsd,
854                                             __location__ "bsd_recvfrom");
855         if (tevent_req_nomem(state->src, req)) {
856                 return;
857         }
858
859         ZERO_STRUCTP(bsda);
860
861         sa = &bsda->u.sa;
862         sa_socklen = sizeof(bsda->u.ss);
863         /*
864          * for unix sockets we can't use the size of sockaddr_storage
865          * we would get EINVAL
866          */
867         if (bsda->u.sa.sa_family == AF_UNIX) {
868                 sa_socklen = sizeof(bsda->u.un);
869         }
870
871         ret = recvfrom(bsds->fd, state->buf, state->len, 0, sa, &sa_socklen);
872         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
873         if (retry) {
874                 /* retry later */
875                 return;
876         }
877         if (tevent_req_error(req, err)) {
878                 return;
879         }
880
881         if (ret != state->len) {
882                 tevent_req_error(req, EIO);
883                 return;
884         }
885
886         tevent_req_done(req);
887 }
888
889 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
890                                         int *perrno,
891                                         TALLOC_CTX *mem_ctx,
892                                         uint8_t **buf,
893                                         struct tsocket_address **src)
894 {
895         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
896                                         struct tdgram_bsd_recvfrom_state);
897         ssize_t ret;
898
899         ret = tsocket_simple_int_recv(req, perrno);
900         if (ret == 0) {
901                 *buf = talloc_move(mem_ctx, &state->buf);
902                 ret = state->len;
903                 if (src) {
904                         *src = talloc_move(mem_ctx, &state->src);
905                 }
906         }
907
908         tevent_req_received(req);
909         return ret;
910 }
911
912 struct tdgram_bsd_sendto_state {
913         struct tdgram_context *dgram;
914
915         const uint8_t *buf;
916         size_t len;
917         const struct tsocket_address *dst;
918
919         ssize_t ret;
920 };
921
922 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
923 {
924         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
925                                   struct tdgram_bsd);
926
927         tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
928
929         return 0;
930 }
931
932 static void tdgram_bsd_sendto_handler(void *private_data);
933
934 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
935                                                  struct tevent_context *ev,
936                                                  struct tdgram_context *dgram,
937                                                  const uint8_t *buf,
938                                                  size_t len,
939                                                  const struct tsocket_address *dst)
940 {
941         struct tevent_req *req;
942         struct tdgram_bsd_sendto_state *state;
943         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
944         int ret;
945
946         req = tevent_req_create(mem_ctx, &state,
947                                 struct tdgram_bsd_sendto_state);
948         if (!req) {
949                 return NULL;
950         }
951
952         state->dgram    = dgram;
953         state->buf      = buf;
954         state->len      = len;
955         state->dst      = dst;
956         state->ret      = -1;
957
958         talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
959
960         if (bsds->fd == -1) {
961                 tevent_req_error(req, ENOTCONN);
962                 goto post;
963         }
964
965         /*
966          * this is a fast path, not waiting for the
967          * socket to become explicit writeable gains
968          * about 10%-20% performance in benchmark tests.
969          */
970         tdgram_bsd_sendto_handler(req);
971         if (!tevent_req_is_in_progress(req)) {
972                 goto post;
973         }
974
975         ret = tdgram_bsd_set_writeable_handler(bsds, ev,
976                                                tdgram_bsd_sendto_handler,
977                                                req);
978         if (ret == -1) {
979                 tevent_req_error(req, errno);
980                 goto post;
981         }
982
983         return req;
984
985  post:
986         tevent_req_post(req, ev);
987         return req;
988 }
989
990 static void tdgram_bsd_sendto_handler(void *private_data)
991 {
992         struct tevent_req *req = talloc_get_type_abort(private_data,
993                                  struct tevent_req);
994         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
995                                         struct tdgram_bsd_sendto_state);
996         struct tdgram_context *dgram = state->dgram;
997         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
998         struct sockaddr *sa = NULL;
999         socklen_t sa_socklen = 0;
1000         ssize_t ret;
1001         int err;
1002         bool retry;
1003
1004         if (state->dst) {
1005                 struct tsocket_address_bsd *bsda =
1006                         talloc_get_type(state->dst->private_data,
1007                         struct tsocket_address_bsd);
1008
1009                 sa = &bsda->u.sa;
1010                 sa_socklen = sizeof(bsda->u.ss);
1011                 /*
1012                  * for unix sockets we can't use the size of sockaddr_storage
1013                  * we would get EINVAL
1014                  */
1015                 if (bsda->u.sa.sa_family == AF_UNIX) {
1016                         sa_socklen = sizeof(bsda->u.un);
1017                 }
1018         }
1019
1020         ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
1021         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1022         if (retry) {
1023                 /* retry later */
1024                 return;
1025         }
1026         if (tevent_req_error(req, err)) {
1027                 return;
1028         }
1029
1030         state->ret = ret;
1031
1032         tevent_req_done(req);
1033 }
1034
1035 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
1036 {
1037         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1038                                         struct tdgram_bsd_sendto_state);
1039         ssize_t ret;
1040
1041         ret = tsocket_simple_int_recv(req, perrno);
1042         if (ret == 0) {
1043                 ret = state->ret;
1044         }
1045
1046         tevent_req_received(req);
1047         return ret;
1048 }
1049
1050 struct tdgram_bsd_disconnect_state {
1051         uint8_t __dummy;
1052 };
1053
1054 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1055                                                      struct tevent_context *ev,
1056                                                      struct tdgram_context *dgram)
1057 {
1058         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1059         struct tevent_req *req;
1060         struct tdgram_bsd_disconnect_state *state;
1061         int ret;
1062         int err;
1063         bool dummy;
1064
1065         req = tevent_req_create(mem_ctx, &state,
1066                                 struct tdgram_bsd_disconnect_state);
1067         if (req == NULL) {
1068                 return NULL;
1069         }
1070
1071         if (bsds->fd == -1) {
1072                 tevent_req_error(req, ENOTCONN);
1073                 goto post;
1074         }
1075
1076         ret = close(bsds->fd);
1077         bsds->fd = -1;
1078         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1079         if (tevent_req_error(req, err)) {
1080                 goto post;
1081         }
1082
1083         tevent_req_done(req);
1084 post:
1085         tevent_req_post(req, ev);
1086         return req;
1087 }
1088
1089 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1090                                       int *perrno)
1091 {
1092         int ret;
1093
1094         ret = tsocket_simple_int_recv(req, perrno);
1095
1096         tevent_req_received(req);
1097         return ret;
1098 }
1099
1100 static const struct tdgram_context_ops tdgram_bsd_ops = {
1101         .name                   = "bsd",
1102
1103         .recvfrom_send          = tdgram_bsd_recvfrom_send,
1104         .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
1105
1106         .sendto_send            = tdgram_bsd_sendto_send,
1107         .sendto_recv            = tdgram_bsd_sendto_recv,
1108
1109         .disconnect_send        = tdgram_bsd_disconnect_send,
1110         .disconnect_recv        = tdgram_bsd_disconnect_recv,
1111 };
1112
1113 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1114 {
1115         TALLOC_FREE(bsds->fde);
1116         if (bsds->fd != -1) {
1117                 close(bsds->fd);
1118                 bsds->fd = -1;
1119         }
1120         return 0;
1121 }
1122
1123 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1124                                    const struct tsocket_address *remote,
1125                                    bool broadcast,
1126                                    TALLOC_CTX *mem_ctx,
1127                                    struct tdgram_context **_dgram,
1128                                    const char *location)
1129 {
1130         struct tsocket_address_bsd *lbsda =
1131                 talloc_get_type_abort(local->private_data,
1132                 struct tsocket_address_bsd);
1133         struct tsocket_address_bsd *rbsda = NULL;
1134         struct tdgram_context *dgram;
1135         struct tdgram_bsd *bsds;
1136         int fd;
1137         int ret;
1138         bool do_bind = false;
1139         bool do_reuseaddr = false;
1140         socklen_t sa_socklen = sizeof(lbsda->u.ss);
1141
1142         if (remote) {
1143                 rbsda = talloc_get_type_abort(remote->private_data,
1144                         struct tsocket_address_bsd);
1145         }
1146
1147         switch (lbsda->u.sa.sa_family) {
1148         case AF_UNIX:
1149                 if (broadcast) {
1150                         errno = EINVAL;
1151                         return -1;
1152                 }
1153                 if (lbsda->u.un.sun_path[0] != 0) {
1154                         do_reuseaddr = true;
1155                         do_bind = true;
1156                 }
1157                 /*
1158                  * for unix sockets we can't use the size of sockaddr_storage
1159                  * we would get EINVAL
1160                  */
1161                 sa_socklen = sizeof(lbsda->u.un);
1162                 break;
1163         case AF_INET:
1164                 if (lbsda->u.in.sin_port != 0) {
1165                         do_reuseaddr = true;
1166                         do_bind = true;
1167                 }
1168                 if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
1169                         do_bind = true;
1170                 }
1171                 break;
1172 #ifdef HAVE_IPV6
1173         case AF_INET6:
1174                 if (lbsda->u.in6.sin6_port != 0) {
1175                         do_reuseaddr = true;
1176                         do_bind = true;
1177                 }
1178                 if (memcmp(&in6addr_any,
1179                            &lbsda->u.in6.sin6_addr,
1180                            sizeof(in6addr_any)) != 0) {
1181                         do_bind = true;
1182                 }
1183                 break;
1184 #endif
1185         default:
1186                 errno = EINVAL;
1187                 return -1;
1188         }
1189
1190         fd = socket(lbsda->u.sa.sa_family, SOCK_DGRAM, 0);
1191         if (fd < 0) {
1192                 return fd;
1193         }
1194
1195         fd = tsocket_bsd_common_prepare_fd(fd, true);
1196         if (fd < 0) {
1197                 return fd;
1198         }
1199
1200         dgram = tdgram_context_create(mem_ctx,
1201                                       &tdgram_bsd_ops,
1202                                       &bsds,
1203                                       struct tdgram_bsd,
1204                                       location);
1205         if (!dgram) {
1206                 int saved_errno = errno;
1207                 close(fd);
1208                 errno = saved_errno;
1209                 return -1;
1210         }
1211         ZERO_STRUCTP(bsds);
1212         bsds->fd = fd;
1213         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1214
1215         if (broadcast) {
1216                 int val = 1;
1217
1218                 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1219                                  (const void *)&val, sizeof(val));
1220                 if (ret == -1) {
1221                         int saved_errno = errno;
1222                         talloc_free(dgram);
1223                         errno = saved_errno;
1224                         return ret;
1225                 }
1226         }
1227
1228         if (do_reuseaddr) {
1229                 int val = 1;
1230
1231                 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1232                                  (const void *)&val, sizeof(val));
1233                 if (ret == -1) {
1234                         int saved_errno = errno;
1235                         talloc_free(dgram);
1236                         errno = saved_errno;
1237                         return ret;
1238                 }
1239         }
1240
1241         if (do_bind) {
1242                 ret = bind(fd, &lbsda->u.sa, sa_socklen);
1243                 if (ret == -1) {
1244                         int saved_errno = errno;
1245                         talloc_free(dgram);
1246                         errno = saved_errno;
1247                         return ret;
1248                 }
1249         }
1250
1251         if (rbsda) {
1252                 ret = connect(fd, &rbsda->u.sa, sa_socklen);
1253                 if (ret == -1) {
1254                         int saved_errno = errno;
1255                         talloc_free(dgram);
1256                         errno = saved_errno;
1257                         return ret;
1258                 }
1259         }
1260
1261         *_dgram = dgram;
1262         return 0;
1263 }
1264
1265 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1266                             const struct tsocket_address *remote,
1267                             TALLOC_CTX *mem_ctx,
1268                             struct tdgram_context **dgram,
1269                             const char *location)
1270 {
1271         struct tsocket_address_bsd *lbsda =
1272                 talloc_get_type_abort(local->private_data,
1273                 struct tsocket_address_bsd);
1274         int ret;
1275
1276         switch (lbsda->u.sa.sa_family) {
1277         case AF_INET:
1278                 break;
1279 #ifdef HAVE_IPV6
1280         case AF_INET6:
1281                 break;
1282 #endif
1283         default:
1284                 errno = EINVAL;
1285                 return -1;
1286         }
1287
1288         ret = tdgram_bsd_dgram_socket(local, remote, false,
1289                                       mem_ctx, dgram, location);
1290
1291         return ret;
1292 }
1293
1294 int _tdgram_unix_socket(const struct tsocket_address *local,
1295                         const struct tsocket_address *remote,
1296                         TALLOC_CTX *mem_ctx,
1297                         struct tdgram_context **dgram,
1298                         const char *location)
1299 {
1300         struct tsocket_address_bsd *lbsda =
1301                 talloc_get_type_abort(local->private_data,
1302                 struct tsocket_address_bsd);
1303         int ret;
1304
1305         switch (lbsda->u.sa.sa_family) {
1306         case AF_UNIX:
1307                 break;
1308         default:
1309                 errno = EINVAL;
1310                 return -1;
1311         }
1312
1313         ret = tdgram_bsd_dgram_socket(local, remote, false,
1314                                       mem_ctx, dgram, location);
1315
1316         return ret;
1317 }
1318
1319 struct tstream_bsd {
1320         int fd;
1321
1322         void *event_ptr;
1323         struct tevent_fd *fde;
1324
1325         void *readable_private;
1326         void (*readable_handler)(void *private_data);
1327         void *writeable_private;
1328         void (*writeable_handler)(void *private_data);
1329 };
1330
1331 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1332                                     struct tevent_fd *fde,
1333                                     uint16_t flags,
1334                                     void *private_data)
1335 {
1336         struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1337                                    struct tstream_bsd);
1338
1339         if (flags & TEVENT_FD_WRITE) {
1340                 bsds->writeable_handler(bsds->writeable_private);
1341                 return;
1342         }
1343         if (flags & TEVENT_FD_READ) {
1344                 if (!bsds->readable_handler) {
1345                         if (bsds->writeable_handler) {
1346                                 bsds->writeable_handler(bsds->writeable_private);
1347                                 return;
1348                         }
1349                         TEVENT_FD_NOT_READABLE(bsds->fde);
1350                         return;
1351                 }
1352                 bsds->readable_handler(bsds->readable_private);
1353                 return;
1354         }
1355 }
1356
1357 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1358                                             struct tevent_context *ev,
1359                                             void (*handler)(void *private_data),
1360                                             void *private_data)
1361 {
1362         if (ev == NULL) {
1363                 if (handler) {
1364                         errno = EINVAL;
1365                         return -1;
1366                 }
1367                 if (!bsds->readable_handler) {
1368                         return 0;
1369                 }
1370                 bsds->readable_handler = NULL;
1371                 bsds->readable_private = NULL;
1372
1373                 return 0;
1374         }
1375
1376         /* read and write must use the same tevent_context */
1377         if (bsds->event_ptr != ev) {
1378                 if (bsds->readable_handler || bsds->writeable_handler) {
1379                         errno = EINVAL;
1380                         return -1;
1381                 }
1382                 bsds->event_ptr = NULL;
1383                 TALLOC_FREE(bsds->fde);
1384         }
1385
1386         if (tevent_fd_get_flags(bsds->fde) == 0) {
1387                 TALLOC_FREE(bsds->fde);
1388
1389                 bsds->fde = tevent_add_fd(ev, bsds,
1390                                           bsds->fd, TEVENT_FD_READ,
1391                                           tstream_bsd_fde_handler,
1392                                           bsds);
1393                 if (!bsds->fde) {
1394                         errno = ENOMEM;
1395                         return -1;
1396                 }
1397
1398                 /* cache the event context we're running on */
1399                 bsds->event_ptr = ev;
1400         } else if (!bsds->readable_handler) {
1401                 TEVENT_FD_READABLE(bsds->fde);
1402         }
1403
1404         bsds->readable_handler = handler;
1405         bsds->readable_private = private_data;
1406
1407         return 0;
1408 }
1409
1410 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1411                                              struct tevent_context *ev,
1412                                              void (*handler)(void *private_data),
1413                                              void *private_data)
1414 {
1415         if (ev == NULL) {
1416                 if (handler) {
1417                         errno = EINVAL;
1418                         return -1;
1419                 }
1420                 if (!bsds->writeable_handler) {
1421                         return 0;
1422                 }
1423                 bsds->writeable_handler = NULL;
1424                 bsds->writeable_private = NULL;
1425                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1426
1427                 return 0;
1428         }
1429
1430         /* read and write must use the same tevent_context */
1431         if (bsds->event_ptr != ev) {
1432                 if (bsds->readable_handler || bsds->writeable_handler) {
1433                         errno = EINVAL;
1434                         return -1;
1435                 }
1436                 bsds->event_ptr = NULL;
1437                 TALLOC_FREE(bsds->fde);
1438         }
1439
1440         if (tevent_fd_get_flags(bsds->fde) == 0) {
1441                 TALLOC_FREE(bsds->fde);
1442
1443                 bsds->fde = tevent_add_fd(ev, bsds,
1444                                           bsds->fd,
1445                                           TEVENT_FD_READ | TEVENT_FD_WRITE,
1446                                           tstream_bsd_fde_handler,
1447                                           bsds);
1448                 if (!bsds->fde) {
1449                         errno = ENOMEM;
1450                         return -1;
1451                 }
1452
1453                 /* cache the event context we're running on */
1454                 bsds->event_ptr = ev;
1455         } else if (!bsds->writeable_handler) {
1456                 uint16_t flags = tevent_fd_get_flags(bsds->fde);
1457                 flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
1458                 tevent_fd_set_flags(bsds->fde, flags);
1459         }
1460
1461         bsds->writeable_handler = handler;
1462         bsds->writeable_private = private_data;
1463
1464         return 0;
1465 }
1466
1467 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1468 {
1469         struct tstream_bsd *bsds = tstream_context_data(stream,
1470                                    struct tstream_bsd);
1471         ssize_t ret;
1472
1473         if (bsds->fd == -1) {
1474                 errno = ENOTCONN;
1475                 return -1;
1476         }
1477
1478         ret = tsocket_bsd_pending(bsds->fd);
1479
1480         return ret;
1481 }
1482
1483 struct tstream_bsd_readv_state {
1484         struct tstream_context *stream;
1485
1486         struct iovec *vector;
1487         size_t count;
1488
1489         int ret;
1490 };
1491
1492 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1493 {
1494         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1495                                    struct tstream_bsd);
1496
1497         tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1498
1499         return 0;
1500 }
1501
1502 static void tstream_bsd_readv_handler(void *private_data);
1503
1504 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1505                                         struct tevent_context *ev,
1506                                         struct tstream_context *stream,
1507                                         struct iovec *vector,
1508                                         size_t count)
1509 {
1510         struct tevent_req *req;
1511         struct tstream_bsd_readv_state *state;
1512         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1513         int ret;
1514
1515         req = tevent_req_create(mem_ctx, &state,
1516                                 struct tstream_bsd_readv_state);
1517         if (!req) {
1518                 return NULL;
1519         }
1520
1521         state->stream   = stream;
1522         /* we make a copy of the vector so that we can modify it */
1523         state->vector   = talloc_array(state, struct iovec, count);
1524         if (tevent_req_nomem(state->vector, req)) {
1525                 goto post;
1526         }
1527         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1528         state->count    = count;
1529         state->ret      = 0;
1530
1531         talloc_set_destructor(state, tstream_bsd_readv_destructor);
1532
1533         if (bsds->fd == -1) {
1534                 tevent_req_error(req, ENOTCONN);
1535                 goto post;
1536         }
1537
1538         /*
1539          * this is a fast path, not waiting for the
1540          * socket to become explicit readable gains
1541          * about 10%-20% performance in benchmark tests.
1542          */
1543         tstream_bsd_readv_handler(req);
1544         if (!tevent_req_is_in_progress(req)) {
1545                 goto post;
1546         }
1547
1548         ret = tstream_bsd_set_readable_handler(bsds, ev,
1549                                               tstream_bsd_readv_handler,
1550                                               req);
1551         if (ret == -1) {
1552                 tevent_req_error(req, errno);
1553                 goto post;
1554         }
1555
1556         return req;
1557
1558  post:
1559         tevent_req_post(req, ev);
1560         return req;
1561 }
1562
1563 static void tstream_bsd_readv_handler(void *private_data)
1564 {
1565         struct tevent_req *req = talloc_get_type_abort(private_data,
1566                                  struct tevent_req);
1567         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1568                                         struct tstream_bsd_readv_state);
1569         struct tstream_context *stream = state->stream;
1570         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1571         int ret;
1572         int err;
1573         bool retry;
1574
1575         ret = readv(bsds->fd, state->vector, state->count);
1576         if (ret == 0) {
1577                 /* propagate end of file */
1578                 tevent_req_error(req, EPIPE);
1579                 return;
1580         }
1581         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1582         if (retry) {
1583                 /* retry later */
1584                 return;
1585         }
1586         if (tevent_req_error(req, err)) {
1587                 return;
1588         }
1589
1590         state->ret += ret;
1591
1592         while (ret > 0) {
1593                 if (ret < state->vector[0].iov_len) {
1594                         uint8_t *base;
1595                         base = (uint8_t *)state->vector[0].iov_base;
1596                         base += ret;
1597                         state->vector[0].iov_base = base;
1598                         state->vector[0].iov_len -= ret;
1599                         break;
1600                 }
1601                 ret -= state->vector[0].iov_len;
1602                 state->vector += 1;
1603                 state->count -= 1;
1604         }
1605
1606         /*
1607          * there're maybe some empty vectors at the end
1608          * which we need to skip, otherwise we would get
1609          * ret == 0 from the readv() call and return EPIPE
1610          */
1611         while (state->count > 0) {
1612                 if (state->vector[0].iov_len > 0) {
1613                         break;
1614                 }
1615                 state->vector += 1;
1616                 state->count -= 1;
1617         }
1618
1619         if (state->count > 0) {
1620                 /* we have more to read */
1621                 return;
1622         }
1623
1624         tevent_req_done(req);
1625 }
1626
1627 static int tstream_bsd_readv_recv(struct tevent_req *req,
1628                                   int *perrno)
1629 {
1630         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1631                                         struct tstream_bsd_readv_state);
1632         int ret;
1633
1634         ret = tsocket_simple_int_recv(req, perrno);
1635         if (ret == 0) {
1636                 ret = state->ret;
1637         }
1638
1639         tevent_req_received(req);
1640         return ret;
1641 }
1642
1643 struct tstream_bsd_writev_state {
1644         struct tstream_context *stream;
1645
1646         struct iovec *vector;
1647         size_t count;
1648
1649         int ret;
1650 };
1651
1652 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
1653 {
1654         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1655                                   struct tstream_bsd);
1656
1657         tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1658
1659         return 0;
1660 }
1661
1662 static void tstream_bsd_writev_handler(void *private_data);
1663
1664 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
1665                                                  struct tevent_context *ev,
1666                                                  struct tstream_context *stream,
1667                                                  const struct iovec *vector,
1668                                                  size_t count)
1669 {
1670         struct tevent_req *req;
1671         struct tstream_bsd_writev_state *state;
1672         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1673         int ret;
1674
1675         req = tevent_req_create(mem_ctx, &state,
1676                                 struct tstream_bsd_writev_state);
1677         if (!req) {
1678                 return NULL;
1679         }
1680
1681         state->stream   = stream;
1682         /* we make a copy of the vector so that we can modify it */
1683         state->vector   = talloc_array(state, struct iovec, count);
1684         if (tevent_req_nomem(state->vector, req)) {
1685                 goto post;
1686         }
1687         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1688         state->count    = count;
1689         state->ret      = 0;
1690
1691         talloc_set_destructor(state, tstream_bsd_writev_destructor);
1692
1693         if (bsds->fd == -1) {
1694                 tevent_req_error(req, ENOTCONN);
1695                 goto post;
1696         }
1697
1698         /*
1699          * this is a fast path, not waiting for the
1700          * socket to become explicit writeable gains
1701          * about 10%-20% performance in benchmark tests.
1702          */
1703         tstream_bsd_writev_handler(req);
1704         if (!tevent_req_is_in_progress(req)) {
1705                 goto post;
1706         }
1707
1708         ret = tstream_bsd_set_writeable_handler(bsds, ev,
1709                                                tstream_bsd_writev_handler,
1710                                                req);
1711         if (ret == -1) {
1712                 tevent_req_error(req, errno);
1713                 goto post;
1714         }
1715
1716         return req;
1717
1718  post:
1719         tevent_req_post(req, ev);
1720         return req;
1721 }
1722
1723 static void tstream_bsd_writev_handler(void *private_data)
1724 {
1725         struct tevent_req *req = talloc_get_type_abort(private_data,
1726                                  struct tevent_req);
1727         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1728                                         struct tstream_bsd_writev_state);
1729         struct tstream_context *stream = state->stream;
1730         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1731         ssize_t ret;
1732         int err;
1733         bool retry;
1734
1735         ret = writev(bsds->fd, state->vector, state->count);
1736         if (ret == 0) {
1737                 /* propagate end of file */
1738                 tevent_req_error(req, EPIPE);
1739                 return;
1740         }
1741         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1742         if (retry) {
1743                 /* retry later */
1744                 return;
1745         }
1746         if (tevent_req_error(req, err)) {
1747                 return;
1748         }
1749
1750         state->ret += ret;
1751
1752         while (ret > 0) {
1753                 if (ret < state->vector[0].iov_len) {
1754                         uint8_t *base;
1755                         base = (uint8_t *)state->vector[0].iov_base;
1756                         base += ret;
1757                         state->vector[0].iov_base = base;
1758                         state->vector[0].iov_len -= ret;
1759                         break;
1760                 }
1761                 ret -= state->vector[0].iov_len;
1762                 state->vector += 1;
1763                 state->count -= 1;
1764         }
1765
1766         /*
1767          * there're maybe some empty vectors at the end
1768          * which we need to skip, otherwise we would get
1769          * ret == 0 from the writev() call and return EPIPE
1770          */
1771         while (state->count > 0) {
1772                 if (state->vector[0].iov_len > 0) {
1773                         break;
1774                 }
1775                 state->vector += 1;
1776                 state->count -= 1;
1777         }
1778
1779         if (state->count > 0) {
1780                 /* we have more to read */
1781                 return;
1782         }
1783
1784         tevent_req_done(req);
1785 }
1786
1787 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
1788 {
1789         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1790                                         struct tstream_bsd_writev_state);
1791         int ret;
1792
1793         ret = tsocket_simple_int_recv(req, perrno);
1794         if (ret == 0) {
1795                 ret = state->ret;
1796         }
1797
1798         tevent_req_received(req);
1799         return ret;
1800 }
1801
1802 struct tstream_bsd_disconnect_state {
1803         void *__dummy;
1804 };
1805
1806 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1807                                                      struct tevent_context *ev,
1808                                                      struct tstream_context *stream)
1809 {
1810         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1811         struct tevent_req *req;
1812         struct tstream_bsd_disconnect_state *state;
1813         int ret;
1814         int err;
1815         bool dummy;
1816
1817         req = tevent_req_create(mem_ctx, &state,
1818                                 struct tstream_bsd_disconnect_state);
1819         if (req == NULL) {
1820                 return NULL;
1821         }
1822
1823         if (bsds->fd == -1) {
1824                 tevent_req_error(req, ENOTCONN);
1825                 goto post;
1826         }
1827
1828         ret = close(bsds->fd);
1829         bsds->fd = -1;
1830         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1831         if (tevent_req_error(req, err)) {
1832                 goto post;
1833         }
1834
1835         tevent_req_done(req);
1836 post:
1837         tevent_req_post(req, ev);
1838         return req;
1839 }
1840
1841 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
1842                                       int *perrno)
1843 {
1844         int ret;
1845
1846         ret = tsocket_simple_int_recv(req, perrno);
1847
1848         tevent_req_received(req);
1849         return ret;
1850 }
1851
1852 static const struct tstream_context_ops tstream_bsd_ops = {
1853         .name                   = "bsd",
1854
1855         .pending_bytes          = tstream_bsd_pending_bytes,
1856
1857         .readv_send             = tstream_bsd_readv_send,
1858         .readv_recv             = tstream_bsd_readv_recv,
1859
1860         .writev_send            = tstream_bsd_writev_send,
1861         .writev_recv            = tstream_bsd_writev_recv,
1862
1863         .disconnect_send        = tstream_bsd_disconnect_send,
1864         .disconnect_recv        = tstream_bsd_disconnect_recv,
1865 };
1866
1867 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
1868 {
1869         TALLOC_FREE(bsds->fde);
1870         if (bsds->fd != -1) {
1871                 close(bsds->fd);
1872                 bsds->fd = -1;
1873         }
1874         return 0;
1875 }
1876
1877 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
1878                                  int fd,
1879                                  struct tstream_context **_stream,
1880                                  const char *location)
1881 {
1882         struct tstream_context *stream;
1883         struct tstream_bsd *bsds;
1884
1885         stream = tstream_context_create(mem_ctx,
1886                                         &tstream_bsd_ops,
1887                                         &bsds,
1888                                         struct tstream_bsd,
1889                                         location);
1890         if (!stream) {
1891                 return -1;
1892         }
1893         ZERO_STRUCTP(bsds);
1894         bsds->fd = fd;
1895         talloc_set_destructor(bsds, tstream_bsd_destructor);
1896
1897         *_stream = stream;
1898         return 0;
1899 }
1900
1901 struct tstream_bsd_connect_state {
1902         int fd;
1903         struct tevent_fd *fde;
1904         struct tstream_conext *stream;
1905 };
1906
1907 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
1908 {
1909         TALLOC_FREE(state->fde);
1910         if (state->fd != -1) {
1911                 close(state->fd);
1912                 state->fd = -1;
1913         }
1914
1915         return 0;
1916 }
1917
1918 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
1919                                             struct tevent_fd *fde,
1920                                             uint16_t flags,
1921                                             void *private_data);
1922
1923 static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
1924                                         struct tevent_context *ev,
1925                                         int sys_errno,
1926                                         const struct tsocket_address *local,
1927                                         const struct tsocket_address *remote)
1928 {
1929         struct tevent_req *req;
1930         struct tstream_bsd_connect_state *state;
1931         struct tsocket_address_bsd *lbsda =
1932                 talloc_get_type_abort(local->private_data,
1933                 struct tsocket_address_bsd);
1934         struct tsocket_address_bsd *rbsda =
1935                 talloc_get_type_abort(remote->private_data,
1936                 struct tsocket_address_bsd);
1937         int ret;
1938         int err;
1939         bool retry;
1940         bool do_bind = false;
1941         bool do_reuseaddr = false;
1942         socklen_t sa_socklen = sizeof(rbsda->u.ss);
1943
1944         req = tevent_req_create(mem_ctx, &state,
1945                                 struct tstream_bsd_connect_state);
1946         if (!req) {
1947                 return NULL;
1948         }
1949         state->fd = -1;
1950         state->fde = NULL;
1951
1952         talloc_set_destructor(state, tstream_bsd_connect_destructor);
1953
1954         /* give the wrappers a chance to report an error */
1955         if (sys_errno != 0) {
1956                 tevent_req_error(req, sys_errno);
1957                 goto post;
1958         }
1959
1960         switch (lbsda->u.sa.sa_family) {
1961         case AF_UNIX:
1962                 if (lbsda->u.un.sun_path[0] != 0) {
1963                         do_reuseaddr = true;
1964                         do_bind = true;
1965                 }
1966                 /*
1967                  * for unix sockets we can't use the size of sockaddr_storage
1968                  * we would get EINVAL
1969                  */
1970                 sa_socklen = sizeof(rbsda->u.un);
1971                 break;
1972         case AF_INET:
1973                 if (lbsda->u.in.sin_port != 0) {
1974                         do_reuseaddr = true;
1975                         do_bind = true;
1976                 }
1977                 if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
1978                         do_bind = true;
1979                 }
1980                 break;
1981 #ifdef HAVE_IPV6
1982         case AF_INET6:
1983                 if (lbsda->u.in6.sin6_port != 0) {
1984                         do_reuseaddr = true;
1985                         do_bind = true;
1986                 }
1987                 if (memcmp(&in6addr_any,
1988                            &lbsda->u.in6.sin6_addr,
1989                            sizeof(in6addr_any)) != 0) {
1990                         do_bind = true;
1991                 }
1992                 break;
1993 #endif
1994         default:
1995                 tevent_req_error(req, EINVAL);
1996                 goto post;
1997         }
1998
1999         state->fd = socket(lbsda->u.sa.sa_family, SOCK_STREAM, 0);
2000         if (state->fd == -1) {
2001                 tevent_req_error(req, errno);
2002                 goto post;
2003         }
2004
2005         state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
2006         if (state->fd == -1) {
2007                 tevent_req_error(req, errno);
2008                 goto post;
2009         }
2010
2011         if (do_reuseaddr) {
2012                 int val = 1;
2013
2014                 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
2015                                  (const void *)&val, sizeof(val));
2016                 if (ret == -1) {
2017                         tevent_req_error(req, errno);
2018                         goto post;
2019                 }
2020         }
2021
2022         if (do_bind) {
2023                 ret = bind(state->fd, &lbsda->u.sa, sizeof(lbsda->u.ss));
2024                 if (ret == -1) {
2025                         tevent_req_error(req, errno);
2026                         goto post;
2027                 }
2028         }
2029
2030         ret = connect(state->fd, &rbsda->u.sa, sa_socklen);
2031         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2032         if (retry) {
2033                 /* retry later */
2034                 goto async;
2035         }
2036         if (tevent_req_error(req, err)) {
2037                 goto post;
2038         }
2039
2040         tevent_req_done(req);
2041         goto post;
2042
2043  async:
2044         state->fde = tevent_add_fd(ev, state,
2045                                    state->fd,
2046                                    TEVENT_FD_READ | TEVENT_FD_WRITE,
2047                                    tstream_bsd_connect_fde_handler,
2048                                    req);
2049         if (tevent_req_nomem(state->fde, req)) {
2050                 goto post;
2051         }
2052
2053         return req;
2054
2055  post:
2056         tevent_req_post(req, ev);
2057         return req;
2058 }
2059
2060 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2061                                             struct tevent_fd *fde,
2062                                             uint16_t flags,
2063                                             void *private_data)
2064 {
2065         struct tevent_req *req = talloc_get_type_abort(private_data,
2066                                  struct tevent_req);
2067         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2068                                         struct tstream_bsd_connect_state);
2069         int ret;
2070         int error=0;
2071         socklen_t len = sizeof(error);
2072         int err;
2073         bool retry;
2074
2075         ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
2076         if (ret == 0) {
2077                 if (error != 0) {
2078                         errno = error;
2079                         ret = -1;
2080                 }
2081         }
2082         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2083         if (retry) {
2084                 /* retry later */
2085                 return;
2086         }
2087         if (tevent_req_error(req, err)) {
2088                 return;
2089         }
2090
2091         tevent_req_done(req);
2092 }
2093
2094 static int tstream_bsd_connect_recv(struct tevent_req *req,
2095                                     int *perrno,
2096                                     TALLOC_CTX *mem_ctx,
2097                                     struct tstream_context **stream,
2098                                     const char *location)
2099 {
2100         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2101                                         struct tstream_bsd_connect_state);
2102         int ret;
2103
2104         ret = tsocket_simple_int_recv(req, perrno);
2105         if (ret == 0) {
2106                 ret = _tstream_bsd_existing_socket(mem_ctx,
2107                                                    state->fd,
2108                                                    stream,
2109                                                    location);
2110                 if (ret == -1) {
2111                         *perrno = errno;
2112                         goto done;
2113                 }
2114                 TALLOC_FREE(state->fde);
2115                 state->fd = -1;
2116         }
2117
2118 done:
2119         tevent_req_received(req);
2120         return ret;
2121 }
2122
2123 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2124                                         struct tevent_context *ev,
2125                                         const struct tsocket_address *local,
2126                                         const struct tsocket_address *remote)
2127 {
2128         struct tsocket_address_bsd *lbsda =
2129                 talloc_get_type_abort(local->private_data,
2130                 struct tsocket_address_bsd);
2131         struct tevent_req *req;
2132         int sys_errno = 0;
2133
2134         switch (lbsda->u.sa.sa_family) {
2135         case AF_INET:
2136                 break;
2137 #ifdef HAVE_IPV6
2138         case AF_INET6:
2139                 break;
2140 #endif
2141         default:
2142                 sys_errno = EINVAL;
2143                 break;
2144         }
2145
2146         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2147
2148         return req;
2149 }
2150
2151 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2152                                    int *perrno,
2153                                    TALLOC_CTX *mem_ctx,
2154                                    struct tstream_context **stream,
2155                                    const char *location)
2156 {
2157         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2158 }
2159
2160 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2161                                         struct tevent_context *ev,
2162                                         const struct tsocket_address *local,
2163                                         const struct tsocket_address *remote)
2164 {
2165         struct tsocket_address_bsd *lbsda =
2166                 talloc_get_type_abort(local->private_data,
2167                 struct tsocket_address_bsd);
2168         struct tevent_req *req;
2169         int sys_errno = 0;
2170
2171         switch (lbsda->u.sa.sa_family) {
2172         case AF_UNIX:
2173                 break;
2174         default:
2175                 sys_errno = EINVAL;
2176                 break;
2177         }
2178
2179         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2180
2181         return req;
2182 }
2183
2184 int _tstream_unix_connect_recv(struct tevent_req *req,
2185                                       int *perrno,
2186                                       TALLOC_CTX *mem_ctx,
2187                                       struct tstream_context **stream,
2188                                       const char *location)
2189 {
2190         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2191 }
2192
2193 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2194                              struct tstream_context **_stream1,
2195                              TALLOC_CTX *mem_ctx2,
2196                              struct tstream_context **_stream2,
2197                              const char *location)
2198 {
2199         int ret;
2200         int fds[2];
2201         int fd1;
2202         int fd2;
2203         struct tstream_context *stream1 = NULL;
2204         struct tstream_context *stream2 = NULL;
2205
2206         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2207         if (ret == -1) {
2208                 return -1;
2209         }
2210         fd1 = fds[0];
2211         fd2 = fds[1];
2212
2213         fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2214         if (fd1 == -1) {
2215                 int sys_errno = errno;
2216                 close(fd2);
2217                 errno = sys_errno;
2218                 return -1;
2219         }
2220
2221         fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2222         if (fd2 == -1) {
2223                 int sys_errno = errno;
2224                 close(fd1);
2225                 errno = sys_errno;
2226                 return -1;
2227         }
2228
2229         ret = _tstream_bsd_existing_socket(mem_ctx1,
2230                                            fd1,
2231                                            &stream1,
2232                                            location);
2233         if (ret == -1) {
2234                 int sys_errno = errno;
2235                 close(fd1);
2236                 close(fd2);
2237                 errno = sys_errno;
2238                 return -1;
2239         }
2240
2241         ret = _tstream_bsd_existing_socket(mem_ctx2,
2242                                            fd2,
2243                                            &stream2,
2244                                            location);
2245         if (ret == -1) {
2246                 int sys_errno = errno;
2247                 talloc_free(stream1);
2248                 close(fd2);
2249                 errno = sys_errno;
2250                 return -1;
2251         }
2252
2253         *_stream1 = stream1;
2254         *_stream2 = stream2;
2255         return 0;
2256 }
2257