tsocket: rename sa_len => sa_socklen, because sa_len is a macro on some platforms
[samba.git] / lib / tsocket / tsocket_bsd.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Copyright (C) Stefan Metzmacher 2009
5
6      ** NOTE! The following LGPL license applies to the tevent
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 3 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "replace.h"
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "tsocket.h"
28 #include "tsocket_internal.h"
29
30 static int tsocket_bsd_error_from_errno(int ret,
31                                         int sys_errno,
32                                         bool *retry)
33 {
34         *retry = false;
35
36         if (ret >= 0) {
37                 return 0;
38         }
39
40         if (ret != -1) {
41                 return EIO;
42         }
43
44         if (sys_errno == 0) {
45                 return EIO;
46         }
47
48         if (sys_errno == EINTR) {
49                 *retry = true;
50                 return sys_errno;
51         }
52
53         if (sys_errno == EINPROGRESS) {
54                 *retry = true;
55                 return sys_errno;
56         }
57
58         if (sys_errno == EAGAIN) {
59                 *retry = true;
60                 return sys_errno;
61         }
62
63 #ifdef EWOULDBLOCK
64         if (sys_errno == EWOULDBLOCK) {
65                 *retry = true;
66                 return sys_errno;
67         }
68 #endif
69
70         return sys_errno;
71 }
72
73 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
74 {
75         int i;
76         int sys_errno = 0;
77         int fds[3];
78         int num_fds = 0;
79
80         int result, flags;
81
82         if (fd == -1) {
83                 return -1;
84         }
85
86         /* first make a fd >= 3 */
87         if (high_fd) {
88                 while (fd < 3) {
89                         fds[num_fds++] = fd;
90                         fd = dup(fd);
91                         if (fd == -1) {
92                                 sys_errno = errno;
93                                 break;
94                         }
95                 }
96                 for (i=0; i<num_fds; i++) {
97                         close(fds[i]);
98                 }
99                 if (fd == -1) {
100                         errno = sys_errno;
101                         return fd;
102                 }
103         }
104
105         /* fd should be nonblocking. */
106
107 #ifdef O_NONBLOCK
108 #define FLAG_TO_SET O_NONBLOCK
109 #else
110 #ifdef SYSV
111 #define FLAG_TO_SET O_NDELAY
112 #else /* BSD */
113 #define FLAG_TO_SET FNDELAY
114 #endif
115 #endif
116
117         if ((flags = fcntl(fd, F_GETFL)) == -1) {
118                 goto fail;
119         }
120
121         flags |= FLAG_TO_SET;
122         if (fcntl(fd, F_SETFL, flags) == -1) {
123                 goto fail;
124         }
125
126 #undef FLAG_TO_SET
127
128         /* fd should be closed on exec() */
129 #ifdef FD_CLOEXEC
130         result = flags = fcntl(fd, F_GETFD, 0);
131         if (flags >= 0) {
132                 flags |= FD_CLOEXEC;
133                 result = fcntl(fd, F_SETFD, flags);
134         }
135         if (result < 0) {
136                 goto fail;
137         }
138 #endif
139         return fd;
140
141  fail:
142         if (fd != -1) {
143                 sys_errno = errno;
144                 close(fd);
145                 errno = sys_errno;
146         }
147         return -1;
148 }
149
150 static ssize_t tsocket_bsd_pending(int fd)
151 {
152         int ret, error;
153         int value = 0;
154         socklen_t len;
155
156         ret = ioctl(fd, FIONREAD, &value);
157         if (ret == -1) {
158                 return ret;
159         }
160
161         if (ret != 0) {
162                 /* this should not be reached */
163                 errno = EIO;
164                 return -1;
165         }
166
167         if (value != 0) {
168                 return value;
169         }
170
171         error = 0;
172         len = sizeof(error);
173
174         /*
175          * if no data is available check if the socket is in error state. For
176          * dgram sockets it's the way to return ICMP error messages of
177          * connected sockets to the caller.
178          */
179         ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
180         if (ret == -1) {
181                 return ret;
182         }
183         if (error != 0) {
184                 errno = error;
185                 return -1;
186         }
187         return 0;
188 }
189
190 static const struct tsocket_address_ops tsocket_address_bsd_ops;
191
192 struct tsocket_address_bsd {
193         union {
194                 struct sockaddr sa;
195                 struct sockaddr_in in;
196 #ifdef HAVE_IPV6
197                 struct sockaddr_in6 in6;
198 #endif
199                 struct sockaddr_un un;
200                 struct sockaddr_storage ss;
201         } u;
202 };
203
204 static int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
205                                               struct sockaddr *sa,
206                                               socklen_t sa_socklen,
207                                               struct tsocket_address **_addr,
208                                               const char *location)
209 {
210         struct tsocket_address *addr;
211         struct tsocket_address_bsd *bsda;
212
213         switch (sa->sa_family) {
214         case AF_UNIX:
215                 if (sa_socklen < sizeof(struct sockaddr_un)) {
216                         errno = EINVAL;
217                         return -1;
218                 }
219                 break;
220         case AF_INET:
221                 if (sa_socklen < sizeof(struct sockaddr_in)) {
222                         errno = EINVAL;
223                         return -1;
224                 }
225                 break;
226 #ifdef HAVE_IPV6
227         case AF_INET6:
228                 if (sa_socklen < sizeof(struct sockaddr_in6)) {
229                         errno = EINVAL;
230                         return -1;
231                 }
232                 break;
233 #endif
234         default:
235                 errno = EAFNOSUPPORT;
236                 return -1;
237         }
238
239         if (sa_socklen > sizeof(struct sockaddr_storage)) {
240                 errno = EINVAL;
241                 return -1;
242         }
243
244         addr = tsocket_address_create(mem_ctx,
245                                       &tsocket_address_bsd_ops,
246                                       &bsda,
247                                       struct tsocket_address_bsd,
248                                       location);
249         if (!addr) {
250                 errno = ENOMEM;
251                 return -1;
252         }
253
254         ZERO_STRUCTP(bsda);
255
256         memcpy(&bsda->u.ss, sa, sa_socklen);
257
258         *_addr = addr;
259         return 0;
260 }
261
262 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
263                                        const char *fam,
264                                        const char *addr,
265                                        uint16_t port,
266                                        struct tsocket_address **_addr,
267                                        const char *location)
268 {
269         struct addrinfo hints;
270         struct addrinfo *result = NULL;
271         char port_str[6];
272         int ret;
273
274         ZERO_STRUCT(hints);
275         /*
276          * we use SOCKET_STREAM here to get just one result
277          * back from getaddrinfo().
278          */
279         hints.ai_socktype = SOCK_STREAM;
280         hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
281
282         if (strcasecmp(fam, "ip") == 0) {
283                 hints.ai_family = AF_UNSPEC;
284                 if (!addr) {
285 #ifdef HAVE_IPV6
286                         addr = "::";
287 #else
288                         addr = "0.0.0.0";
289 #endif
290                 }
291         } else if (strcasecmp(fam, "ipv4") == 0) {
292                 hints.ai_family = AF_INET;
293                 if (!addr) {
294                         addr = "0.0.0.0";
295                 }
296 #ifdef HAVE_IPV6
297         } else if (strcasecmp(fam, "ipv6") == 0) {
298                 hints.ai_family = AF_INET6;
299                 if (!addr) {
300                         addr = "::";
301                 }
302 #endif
303         } else {
304                 errno = EAFNOSUPPORT;
305                 return -1;
306         }
307
308         snprintf(port_str, sizeof(port_str) - 1, "%u", port);
309
310         ret = getaddrinfo(addr, port_str, &hints, &result);
311         if (ret != 0) {
312                 switch (ret) {
313                 case EAI_FAIL:
314                         errno = EINVAL;
315                         break;
316                 }
317                 ret = -1;
318                 goto done;
319         }
320
321         if (result->ai_socktype != SOCK_STREAM) {
322                 errno = EINVAL;
323                 ret = -1;
324                 goto done;
325         }
326
327         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
328                                                   result->ai_addr,
329                                                   result->ai_addrlen,
330                                                   _addr,
331                                                   location);
332
333 done:
334         if (result) {
335                 freeaddrinfo(result);
336         }
337         return ret;
338 }
339
340 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
341                                        TALLOC_CTX *mem_ctx)
342 {
343         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
344                                            struct tsocket_address_bsd);
345         char addr_str[INET6_ADDRSTRLEN+1];
346         const char *str;
347
348         if (!bsda) {
349                 errno = EINVAL;
350                 return NULL;
351         }
352
353         switch (bsda->u.sa.sa_family) {
354         case AF_INET:
355                 str = inet_ntop(bsda->u.in.sin_family,
356                                 &bsda->u.in.sin_addr,
357                                 addr_str, sizeof(addr_str));
358                 break;
359 #ifdef HAVE_IPV6
360         case AF_INET6:
361                 str = inet_ntop(bsda->u.in6.sin6_family,
362                                 &bsda->u.in6.sin6_addr,
363                                 addr_str, sizeof(addr_str));
364                 break;
365 #endif
366         default:
367                 errno = EINVAL;
368                 return NULL;
369         }
370
371         if (!str) {
372                 return NULL;
373         }
374
375         return talloc_strdup(mem_ctx, str);
376 }
377
378 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
379 {
380         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
381                                            struct tsocket_address_bsd);
382         uint16_t port = 0;
383
384         if (!bsda) {
385                 errno = EINVAL;
386                 return 0;
387         }
388
389         switch (bsda->u.sa.sa_family) {
390         case AF_INET:
391                 port = ntohs(bsda->u.in.sin_port);
392                 break;
393 #ifdef HAVE_IPV6
394         case AF_INET6:
395                 port = ntohs(bsda->u.in6.sin6_port);
396                 break;
397 #endif
398         default:
399                 errno = EINVAL;
400                 return 0;
401         }
402
403         return port;
404 }
405
406 int tsocket_address_inet_set_port(struct tsocket_address *addr,
407                                   uint16_t port)
408 {
409         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
410                                            struct tsocket_address_bsd);
411
412         if (!bsda) {
413                 errno = EINVAL;
414                 return -1;
415         }
416
417         switch (bsda->u.sa.sa_family) {
418         case AF_INET:
419                 bsda->u.in.sin_port = htons(port);
420                 break;
421 #ifdef HAVE_IPV6
422         case AF_INET6:
423                 bsda->u.in6.sin6_port = htons(port);
424                 break;
425 #endif
426         default:
427                 errno = EINVAL;
428                 return -1;
429         }
430
431         return 0;
432 }
433
434 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
435                                     const char *path,
436                                     struct tsocket_address **_addr,
437                                     const char *location)
438 {
439         struct sockaddr_un un;
440         void *p = &un;
441         int ret;
442
443         if (!path) {
444                 path = "";
445         }
446
447         ZERO_STRUCT(un);
448         un.sun_family = AF_UNIX;
449         strncpy(un.sun_path, path, sizeof(un.sun_path));
450
451         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
452                                                  (struct sockaddr *)p,
453                                                  sizeof(un),
454                                                  _addr,
455                                                  location);
456
457         return ret;
458 }
459
460 char *tsocket_address_unix_path(const struct tsocket_address *addr,
461                                 TALLOC_CTX *mem_ctx)
462 {
463         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
464                                            struct tsocket_address_bsd);
465         const char *str;
466
467         if (!bsda) {
468                 errno = EINVAL;
469                 return NULL;
470         }
471
472         switch (bsda->u.sa.sa_family) {
473         case AF_UNIX:
474                 str = bsda->u.un.sun_path;
475                 break;
476         default:
477                 errno = EINVAL;
478                 return NULL;
479         }
480
481         return talloc_strdup(mem_ctx, str);
482 }
483
484 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
485                                         TALLOC_CTX *mem_ctx)
486 {
487         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
488                                            struct tsocket_address_bsd);
489         char *str;
490         char *addr_str;
491         const char *prefix = NULL;
492         uint16_t port;
493
494         switch (bsda->u.sa.sa_family) {
495         case AF_UNIX:
496                 return talloc_asprintf(mem_ctx, "unix:%s",
497                                        bsda->u.un.sun_path);
498         case AF_INET:
499                 prefix = "ipv4";
500                 break;
501 #ifdef HAVE_IPV6
502         case AF_INET6:
503                 prefix = "ipv6";
504                 break;
505 #endif
506         default:
507                 errno = EINVAL;
508                 return NULL;
509         }
510
511         addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
512         if (!addr_str) {
513                 return NULL;
514         }
515
516         port = tsocket_address_inet_port(addr);
517
518         str = talloc_asprintf(mem_ctx, "%s:%s:%u",
519                               prefix, addr_str, port);
520         talloc_free(addr_str);
521
522         return str;
523 }
524
525 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
526                                                          TALLOC_CTX *mem_ctx,
527                                                          const char *location)
528 {
529         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
530                                            struct tsocket_address_bsd);
531         struct tsocket_address *copy;
532         int ret;
533
534         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
535                                                  &bsda->u.sa,
536                                                  sizeof(bsda->u.ss),
537                                                  &copy,
538                                                  location);
539         if (ret != 0) {
540                 return NULL;
541         }
542
543         return copy;
544 }
545
546 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
547         .name           = "bsd",
548         .string         = tsocket_address_bsd_string,
549         .copy           = tsocket_address_bsd_copy,
550 };
551
552 struct tdgram_bsd {
553         int fd;
554
555         void *event_ptr;
556         struct tevent_fd *fde;
557
558         void *readable_private;
559         void (*readable_handler)(void *private_data);
560         void *writeable_private;
561         void (*writeable_handler)(void *private_data);
562 };
563
564 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
565                                    struct tevent_fd *fde,
566                                    uint16_t flags,
567                                    void *private_data)
568 {
569         struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
570                                   struct tdgram_bsd);
571
572         if (flags & TEVENT_FD_WRITE) {
573                 bsds->writeable_handler(bsds->writeable_private);
574                 return;
575         }
576         if (flags & TEVENT_FD_READ) {
577                 if (!bsds->readable_handler) {
578                         TEVENT_FD_NOT_READABLE(bsds->fde);
579                         return;
580                 }
581                 bsds->readable_handler(bsds->readable_private);
582                 return;
583         }
584 }
585
586 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
587                                            struct tevent_context *ev,
588                                            void (*handler)(void *private_data),
589                                            void *private_data)
590 {
591         if (ev == NULL) {
592                 if (handler) {
593                         errno = EINVAL;
594                         return -1;
595                 }
596                 if (!bsds->readable_handler) {
597                         return 0;
598                 }
599                 bsds->readable_handler = NULL;
600                 bsds->readable_private = NULL;
601
602                 return 0;
603         }
604
605         /* read and write must use the same tevent_context */
606         if (bsds->event_ptr != ev) {
607                 if (bsds->readable_handler || bsds->writeable_handler) {
608                         errno = EINVAL;
609                         return -1;
610                 }
611                 bsds->event_ptr = NULL;
612                 TALLOC_FREE(bsds->fde);
613         }
614
615         if (tevent_fd_get_flags(bsds->fde) == 0) {
616                 TALLOC_FREE(bsds->fde);
617
618                 bsds->fde = tevent_add_fd(ev, bsds,
619                                           bsds->fd, TEVENT_FD_READ,
620                                           tdgram_bsd_fde_handler,
621                                           bsds);
622                 if (!bsds->fde) {
623                         errno = ENOMEM;
624                         return -1;
625                 }
626
627                 /* cache the event context we're running on */
628                 bsds->event_ptr = ev;
629         } else if (!bsds->readable_handler) {
630                 TEVENT_FD_READABLE(bsds->fde);
631         }
632
633         bsds->readable_handler = handler;
634         bsds->readable_private = private_data;
635
636         return 0;
637 }
638
639 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
640                                             struct tevent_context *ev,
641                                             void (*handler)(void *private_data),
642                                             void *private_data)
643 {
644         if (ev == NULL) {
645                 if (handler) {
646                         errno = EINVAL;
647                         return -1;
648                 }
649                 if (!bsds->writeable_handler) {
650                         return 0;
651                 }
652                 bsds->writeable_handler = NULL;
653                 bsds->writeable_private = NULL;
654                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
655
656                 return 0;
657         }
658
659         /* read and write must use the same tevent_context */
660         if (bsds->event_ptr != ev) {
661                 if (bsds->readable_handler || bsds->writeable_handler) {
662                         errno = EINVAL;
663                         return -1;
664                 }
665                 bsds->event_ptr = NULL;
666                 TALLOC_FREE(bsds->fde);
667         }
668
669         if (tevent_fd_get_flags(bsds->fde) == 0) {
670                 TALLOC_FREE(bsds->fde);
671
672                 bsds->fde = tevent_add_fd(ev, bsds,
673                                           bsds->fd, TEVENT_FD_WRITE,
674                                           tdgram_bsd_fde_handler,
675                                           bsds);
676                 if (!bsds->fde) {
677                         errno = ENOMEM;
678                         return -1;
679                 }
680
681                 /* cache the event context we're running on */
682                 bsds->event_ptr = ev;
683         } else if (!bsds->writeable_handler) {
684                 TEVENT_FD_WRITEABLE(bsds->fde);
685         }
686
687         bsds->writeable_handler = handler;
688         bsds->writeable_private = private_data;
689
690         return 0;
691 }
692
693 struct tdgram_bsd_recvfrom_state {
694         struct tdgram_context *dgram;
695
696         uint8_t *buf;
697         size_t len;
698         struct tsocket_address *src;
699 };
700
701 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
702 {
703         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
704                                   struct tdgram_bsd);
705
706         tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
707
708         return 0;
709 }
710
711 static void tdgram_bsd_recvfrom_handler(void *private_data);
712
713 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
714                                         struct tevent_context *ev,
715                                         struct tdgram_context *dgram)
716 {
717         struct tevent_req *req;
718         struct tdgram_bsd_recvfrom_state *state;
719         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
720         int ret;
721
722         req = tevent_req_create(mem_ctx, &state,
723                                 struct tdgram_bsd_recvfrom_state);
724         if (!req) {
725                 return NULL;
726         }
727
728         state->dgram    = dgram;
729         state->buf      = NULL;
730         state->len      = 0;
731         state->src      = NULL;
732
733         talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
734
735         if (bsds->fd == -1) {
736                 tevent_req_error(req, ENOTCONN);
737                 goto post;
738         }
739
740         /*
741          * this is a fast path, not waiting for the
742          * socket to become explicit readable gains
743          * about 10%-20% performance in benchmark tests.
744          */
745         tdgram_bsd_recvfrom_handler(req);
746         if (!tevent_req_is_in_progress(req)) {
747                 goto post;
748         }
749
750         ret = tdgram_bsd_set_readable_handler(bsds, ev,
751                                               tdgram_bsd_recvfrom_handler,
752                                               req);
753         if (ret == -1) {
754                 tevent_req_error(req, errno);
755                 goto post;
756         }
757
758         return req;
759
760  post:
761         tevent_req_post(req, ev);
762         return req;
763 }
764
765 static void tdgram_bsd_recvfrom_handler(void *private_data)
766 {
767         struct tevent_req *req = talloc_get_type_abort(private_data,
768                                  struct tevent_req);
769         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
770                                         struct tdgram_bsd_recvfrom_state);
771         struct tdgram_context *dgram = state->dgram;
772         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
773         struct tsocket_address_bsd *bsda;
774         ssize_t ret;
775         struct sockaddr *sa = NULL;
776         socklen_t sa_socklen = 0;
777         int err;
778         bool retry;
779
780         ret = tsocket_bsd_pending(bsds->fd);
781         if (ret == 0) {
782                 /* retry later */
783                 return;
784         }
785         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
786         if (retry) {
787                 /* retry later */
788                 return;
789         }
790         if (tevent_req_error(req, err)) {
791                 return;
792         }
793
794         state->buf = talloc_array(state, uint8_t, ret);
795         if (tevent_req_nomem(state->buf, req)) {
796                 return;
797         }
798         state->len = ret;
799
800         state->src = tsocket_address_create(state,
801                                             &tsocket_address_bsd_ops,
802                                             &bsda,
803                                             struct tsocket_address_bsd,
804                                             __location__ "bsd_recvfrom");
805         if (tevent_req_nomem(state->src, req)) {
806                 return;
807         }
808
809         ZERO_STRUCTP(bsda);
810
811         sa = &bsda->u.sa;
812         sa_socklen = sizeof(bsda->u.ss);
813         /*
814          * for unix sockets we can't use the size of sockaddr_storage
815          * we would get EINVAL
816          */
817         if (bsda->u.sa.sa_family == AF_UNIX) {
818                 sa_socklen = sizeof(bsda->u.un);
819         }
820
821         ret = recvfrom(bsds->fd, state->buf, state->len, 0, sa, &sa_socklen);
822         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
823         if (retry) {
824                 /* retry later */
825                 return;
826         }
827         if (tevent_req_error(req, err)) {
828                 return;
829         }
830
831         if (ret != state->len) {
832                 tevent_req_error(req, EIO);
833                 return;
834         }
835
836         tevent_req_done(req);
837 }
838
839 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
840                                         int *perrno,
841                                         TALLOC_CTX *mem_ctx,
842                                         uint8_t **buf,
843                                         struct tsocket_address **src)
844 {
845         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
846                                         struct tdgram_bsd_recvfrom_state);
847         ssize_t ret;
848
849         ret = tsocket_simple_int_recv(req, perrno);
850         if (ret == 0) {
851                 *buf = talloc_move(mem_ctx, &state->buf);
852                 ret = state->len;
853                 if (src) {
854                         *src = talloc_move(mem_ctx, &state->src);
855                 }
856         }
857
858         tevent_req_received(req);
859         return ret;
860 }
861
862 struct tdgram_bsd_sendto_state {
863         struct tdgram_context *dgram;
864
865         const uint8_t *buf;
866         size_t len;
867         const struct tsocket_address *dst;
868
869         ssize_t ret;
870 };
871
872 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
873 {
874         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
875                                   struct tdgram_bsd);
876
877         tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
878
879         return 0;
880 }
881
882 static void tdgram_bsd_sendto_handler(void *private_data);
883
884 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
885                                                  struct tevent_context *ev,
886                                                  struct tdgram_context *dgram,
887                                                  const uint8_t *buf,
888                                                  size_t len,
889                                                  const struct tsocket_address *dst)
890 {
891         struct tevent_req *req;
892         struct tdgram_bsd_sendto_state *state;
893         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
894         int ret;
895
896         req = tevent_req_create(mem_ctx, &state,
897                                 struct tdgram_bsd_sendto_state);
898         if (!req) {
899                 return NULL;
900         }
901
902         state->dgram    = dgram;
903         state->buf      = buf;
904         state->len      = len;
905         state->dst      = dst;
906         state->ret      = -1;
907
908         talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
909
910         if (bsds->fd == -1) {
911                 tevent_req_error(req, ENOTCONN);
912                 goto post;
913         }
914
915         /*
916          * this is a fast path, not waiting for the
917          * socket to become explicit writeable gains
918          * about 10%-20% performance in benchmark tests.
919          */
920         tdgram_bsd_sendto_handler(req);
921         if (!tevent_req_is_in_progress(req)) {
922                 goto post;
923         }
924
925         ret = tdgram_bsd_set_writeable_handler(bsds, ev,
926                                                tdgram_bsd_sendto_handler,
927                                                req);
928         if (ret == -1) {
929                 tevent_req_error(req, errno);
930                 goto post;
931         }
932
933         return req;
934
935  post:
936         tevent_req_post(req, ev);
937         return req;
938 }
939
940 static void tdgram_bsd_sendto_handler(void *private_data)
941 {
942         struct tevent_req *req = talloc_get_type_abort(private_data,
943                                  struct tevent_req);
944         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
945                                         struct tdgram_bsd_sendto_state);
946         struct tdgram_context *dgram = state->dgram;
947         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
948         struct sockaddr *sa = NULL;
949         socklen_t sa_socklen = 0;
950         ssize_t ret;
951         int err;
952         bool retry;
953
954         if (state->dst) {
955                 struct tsocket_address_bsd *bsda =
956                         talloc_get_type(state->dst->private_data,
957                         struct tsocket_address_bsd);
958
959                 sa = &bsda->u.sa;
960                 sa_socklen = sizeof(bsda->u.ss);
961                 /*
962                  * for unix sockets we can't use the size of sockaddr_storage
963                  * we would get EINVAL
964                  */
965                 if (bsda->u.sa.sa_family == AF_UNIX) {
966                         sa_socklen = sizeof(bsda->u.un);
967                 }
968         }
969
970         ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
971         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
972         if (retry) {
973                 /* retry later */
974                 return;
975         }
976         if (tevent_req_error(req, err)) {
977                 return;
978         }
979
980         state->ret = ret;
981
982         tevent_req_done(req);
983 }
984
985 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
986 {
987         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
988                                         struct tdgram_bsd_sendto_state);
989         ssize_t ret;
990
991         ret = tsocket_simple_int_recv(req, perrno);
992         if (ret == 0) {
993                 ret = state->ret;
994         }
995
996         tevent_req_received(req);
997         return ret;
998 }
999
1000 struct tdgram_bsd_disconnect_state {
1001         uint8_t __dummy;
1002 };
1003
1004 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1005                                                      struct tevent_context *ev,
1006                                                      struct tdgram_context *dgram)
1007 {
1008         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1009         struct tevent_req *req;
1010         struct tdgram_bsd_disconnect_state *state;
1011         int ret;
1012         int err;
1013         bool dummy;
1014
1015         req = tevent_req_create(mem_ctx, &state,
1016                                 struct tdgram_bsd_disconnect_state);
1017         if (req == NULL) {
1018                 return NULL;
1019         }
1020
1021         if (bsds->fd == -1) {
1022                 tevent_req_error(req, ENOTCONN);
1023                 goto post;
1024         }
1025
1026         ret = close(bsds->fd);
1027         bsds->fd = -1;
1028         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1029         if (tevent_req_error(req, err)) {
1030                 goto post;
1031         }
1032
1033         tevent_req_done(req);
1034 post:
1035         tevent_req_post(req, ev);
1036         return req;
1037 }
1038
1039 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1040                                       int *perrno)
1041 {
1042         int ret;
1043
1044         ret = tsocket_simple_int_recv(req, perrno);
1045
1046         tevent_req_received(req);
1047         return ret;
1048 }
1049
1050 static const struct tdgram_context_ops tdgram_bsd_ops = {
1051         .name                   = "bsd",
1052
1053         .recvfrom_send          = tdgram_bsd_recvfrom_send,
1054         .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
1055
1056         .sendto_send            = tdgram_bsd_sendto_send,
1057         .sendto_recv            = tdgram_bsd_sendto_recv,
1058
1059         .disconnect_send        = tdgram_bsd_disconnect_send,
1060         .disconnect_recv        = tdgram_bsd_disconnect_recv,
1061 };
1062
1063 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1064 {
1065         TALLOC_FREE(bsds->fde);
1066         if (bsds->fd != -1) {
1067                 close(bsds->fd);
1068                 bsds->fd = -1;
1069         }
1070         return 0;
1071 }
1072
1073 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1074                                    const struct tsocket_address *remote,
1075                                    bool broadcast,
1076                                    TALLOC_CTX *mem_ctx,
1077                                    struct tdgram_context **_dgram,
1078                                    const char *location)
1079 {
1080         struct tsocket_address_bsd *lbsda =
1081                 talloc_get_type_abort(local->private_data,
1082                 struct tsocket_address_bsd);
1083         struct tsocket_address_bsd *rbsda = NULL;
1084         struct tdgram_context *dgram;
1085         struct tdgram_bsd *bsds;
1086         int fd;
1087         int ret;
1088         bool do_bind = false;
1089         bool do_reuseaddr = false;
1090         socklen_t sa_socklen = sizeof(lbsda->u.ss);
1091
1092         if (remote) {
1093                 rbsda = talloc_get_type_abort(remote->private_data,
1094                         struct tsocket_address_bsd);
1095         }
1096
1097         switch (lbsda->u.sa.sa_family) {
1098         case AF_UNIX:
1099                 if (broadcast) {
1100                         errno = EINVAL;
1101                         return -1;
1102                 }
1103                 if (lbsda->u.un.sun_path[0] != 0) {
1104                         do_reuseaddr = true;
1105                         do_bind = true;
1106                 }
1107                 /*
1108                  * for unix sockets we can't use the size of sockaddr_storage
1109                  * we would get EINVAL
1110                  */
1111                 sa_socklen = sizeof(lbsda->u.un);
1112                 break;
1113         case AF_INET:
1114                 if (lbsda->u.in.sin_port != 0) {
1115                         do_reuseaddr = true;
1116                         do_bind = true;
1117                 }
1118                 if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
1119                         do_bind = true;
1120                 }
1121                 break;
1122 #ifdef HAVE_IPV6
1123         case AF_INET6:
1124                 if (lbsda->u.in6.sin6_port != 0) {
1125                         do_reuseaddr = true;
1126                         do_bind = true;
1127                 }
1128                 if (memcmp(&in6addr_any,
1129                            &lbsda->u.in6.sin6_addr,
1130                            sizeof(in6addr_any)) != 0) {
1131                         do_bind = true;
1132                 }
1133                 break;
1134 #endif
1135         default:
1136                 errno = EINVAL;
1137                 return -1;
1138         }
1139
1140         fd = socket(lbsda->u.sa.sa_family, SOCK_DGRAM, 0);
1141         if (fd < 0) {
1142                 return fd;
1143         }
1144
1145         fd = tsocket_bsd_common_prepare_fd(fd, true);
1146         if (fd < 0) {
1147                 return fd;
1148         }
1149
1150         dgram = tdgram_context_create(mem_ctx,
1151                                       &tdgram_bsd_ops,
1152                                       &bsds,
1153                                       struct tdgram_bsd,
1154                                       location);
1155         if (!dgram) {
1156                 int saved_errno = errno;
1157                 close(fd);
1158                 errno = saved_errno;
1159                 return -1;
1160         }
1161         ZERO_STRUCTP(bsds);
1162         bsds->fd = fd;
1163         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1164
1165         if (broadcast) {
1166                 int val = 1;
1167
1168                 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1169                                  (const void *)&val, sizeof(val));
1170                 if (ret == -1) {
1171                         int saved_errno = errno;
1172                         talloc_free(dgram);
1173                         errno = saved_errno;
1174                         return ret;
1175                 }
1176         }
1177
1178         if (do_reuseaddr) {
1179                 int val = 1;
1180
1181                 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1182                                  (const void *)&val, sizeof(val));
1183                 if (ret == -1) {
1184                         int saved_errno = errno;
1185                         talloc_free(dgram);
1186                         errno = saved_errno;
1187                         return ret;
1188                 }
1189         }
1190
1191         if (do_bind) {
1192                 ret = bind(fd, &lbsda->u.sa, sa_socklen);
1193                 if (ret == -1) {
1194                         int saved_errno = errno;
1195                         talloc_free(dgram);
1196                         errno = saved_errno;
1197                         return ret;
1198                 }
1199         }
1200
1201         if (rbsda) {
1202                 ret = connect(fd, &rbsda->u.sa, sa_socklen);
1203                 if (ret == -1) {
1204                         int saved_errno = errno;
1205                         talloc_free(dgram);
1206                         errno = saved_errno;
1207                         return ret;
1208                 }
1209         }
1210
1211         *_dgram = dgram;
1212         return 0;
1213 }
1214
1215 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1216                             const struct tsocket_address *remote,
1217                             TALLOC_CTX *mem_ctx,
1218                             struct tdgram_context **dgram,
1219                             const char *location)
1220 {
1221         struct tsocket_address_bsd *lbsda =
1222                 talloc_get_type_abort(local->private_data,
1223                 struct tsocket_address_bsd);
1224         int ret;
1225
1226         switch (lbsda->u.sa.sa_family) {
1227         case AF_INET:
1228                 break;
1229 #ifdef HAVE_IPV6
1230         case AF_INET6:
1231                 break;
1232 #endif
1233         default:
1234                 errno = EINVAL;
1235                 return -1;
1236         }
1237
1238         ret = tdgram_bsd_dgram_socket(local, remote, false,
1239                                       mem_ctx, dgram, location);
1240
1241         return ret;
1242 }
1243
1244 int _tdgram_unix_socket(const struct tsocket_address *local,
1245                         const struct tsocket_address *remote,
1246                         TALLOC_CTX *mem_ctx,
1247                         struct tdgram_context **dgram,
1248                         const char *location)
1249 {
1250         struct tsocket_address_bsd *lbsda =
1251                 talloc_get_type_abort(local->private_data,
1252                 struct tsocket_address_bsd);
1253         int ret;
1254
1255         switch (lbsda->u.sa.sa_family) {
1256         case AF_UNIX:
1257                 break;
1258         default:
1259                 errno = EINVAL;
1260                 return -1;
1261         }
1262
1263         ret = tdgram_bsd_dgram_socket(local, remote, false,
1264                                       mem_ctx, dgram, location);
1265
1266         return ret;
1267 }
1268
1269 struct tstream_bsd {
1270         int fd;
1271
1272         void *event_ptr;
1273         struct tevent_fd *fde;
1274
1275         void *readable_private;
1276         void (*readable_handler)(void *private_data);
1277         void *writeable_private;
1278         void (*writeable_handler)(void *private_data);
1279 };
1280
1281 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1282                                     struct tevent_fd *fde,
1283                                     uint16_t flags,
1284                                     void *private_data)
1285 {
1286         struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1287                                    struct tstream_bsd);
1288
1289         if (flags & TEVENT_FD_WRITE) {
1290                 bsds->writeable_handler(bsds->writeable_private);
1291                 return;
1292         }
1293         if (flags & TEVENT_FD_READ) {
1294                 if (!bsds->readable_handler) {
1295                         if (bsds->writeable_handler) {
1296                                 bsds->writeable_handler(bsds->writeable_private);
1297                                 return;
1298                         }
1299                         TEVENT_FD_NOT_READABLE(bsds->fde);
1300                         return;
1301                 }
1302                 bsds->readable_handler(bsds->readable_private);
1303                 return;
1304         }
1305 }
1306
1307 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1308                                             struct tevent_context *ev,
1309                                             void (*handler)(void *private_data),
1310                                             void *private_data)
1311 {
1312         if (ev == NULL) {
1313                 if (handler) {
1314                         errno = EINVAL;
1315                         return -1;
1316                 }
1317                 if (!bsds->readable_handler) {
1318                         return 0;
1319                 }
1320                 bsds->readable_handler = NULL;
1321                 bsds->readable_private = NULL;
1322
1323                 return 0;
1324         }
1325
1326         /* read and write must use the same tevent_context */
1327         if (bsds->event_ptr != ev) {
1328                 if (bsds->readable_handler || bsds->writeable_handler) {
1329                         errno = EINVAL;
1330                         return -1;
1331                 }
1332                 bsds->event_ptr = NULL;
1333                 TALLOC_FREE(bsds->fde);
1334         }
1335
1336         if (tevent_fd_get_flags(bsds->fde) == 0) {
1337                 TALLOC_FREE(bsds->fde);
1338
1339                 bsds->fde = tevent_add_fd(ev, bsds,
1340                                           bsds->fd, TEVENT_FD_READ,
1341                                           tstream_bsd_fde_handler,
1342                                           bsds);
1343                 if (!bsds->fde) {
1344                         errno = ENOMEM;
1345                         return -1;
1346                 }
1347
1348                 /* cache the event context we're running on */
1349                 bsds->event_ptr = ev;
1350         } else if (!bsds->readable_handler) {
1351                 TEVENT_FD_READABLE(bsds->fde);
1352         }
1353
1354         bsds->readable_handler = handler;
1355         bsds->readable_private = private_data;
1356
1357         return 0;
1358 }
1359
1360 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1361                                              struct tevent_context *ev,
1362                                              void (*handler)(void *private_data),
1363                                              void *private_data)
1364 {
1365         if (ev == NULL) {
1366                 if (handler) {
1367                         errno = EINVAL;
1368                         return -1;
1369                 }
1370                 if (!bsds->writeable_handler) {
1371                         return 0;
1372                 }
1373                 bsds->writeable_handler = NULL;
1374                 bsds->writeable_private = NULL;
1375                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1376
1377                 return 0;
1378         }
1379
1380         /* read and write must use the same tevent_context */
1381         if (bsds->event_ptr != ev) {
1382                 if (bsds->readable_handler || bsds->writeable_handler) {
1383                         errno = EINVAL;
1384                         return -1;
1385                 }
1386                 bsds->event_ptr = NULL;
1387                 TALLOC_FREE(bsds->fde);
1388         }
1389
1390         if (tevent_fd_get_flags(bsds->fde) == 0) {
1391                 TALLOC_FREE(bsds->fde);
1392
1393                 bsds->fde = tevent_add_fd(ev, bsds,
1394                                           bsds->fd,
1395                                           TEVENT_FD_READ | TEVENT_FD_WRITE,
1396                                           tstream_bsd_fde_handler,
1397                                           bsds);
1398                 if (!bsds->fde) {
1399                         errno = ENOMEM;
1400                         return -1;
1401                 }
1402
1403                 /* cache the event context we're running on */
1404                 bsds->event_ptr = ev;
1405         } else if (!bsds->writeable_handler) {
1406                 uint16_t flags = tevent_fd_get_flags(bsds->fde);
1407                 flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
1408                 tevent_fd_set_flags(bsds->fde, flags);
1409         }
1410
1411         bsds->writeable_handler = handler;
1412         bsds->writeable_private = private_data;
1413
1414         return 0;
1415 }
1416
1417 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1418 {
1419         struct tstream_bsd *bsds = tstream_context_data(stream,
1420                                    struct tstream_bsd);
1421         ssize_t ret;
1422
1423         if (bsds->fd == -1) {
1424                 errno = ENOTCONN;
1425                 return -1;
1426         }
1427
1428         ret = tsocket_bsd_pending(bsds->fd);
1429
1430         return ret;
1431 }
1432
1433 struct tstream_bsd_readv_state {
1434         struct tstream_context *stream;
1435
1436         struct iovec *vector;
1437         size_t count;
1438
1439         int ret;
1440 };
1441
1442 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1443 {
1444         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1445                                    struct tstream_bsd);
1446
1447         tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1448
1449         return 0;
1450 }
1451
1452 static void tstream_bsd_readv_handler(void *private_data);
1453
1454 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1455                                         struct tevent_context *ev,
1456                                         struct tstream_context *stream,
1457                                         struct iovec *vector,
1458                                         size_t count)
1459 {
1460         struct tevent_req *req;
1461         struct tstream_bsd_readv_state *state;
1462         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1463         int ret;
1464
1465         req = tevent_req_create(mem_ctx, &state,
1466                                 struct tstream_bsd_readv_state);
1467         if (!req) {
1468                 return NULL;
1469         }
1470
1471         state->stream   = stream;
1472         /* we make a copy of the vector so that we can modify it */
1473         state->vector   = talloc_array(state, struct iovec, count);
1474         if (tevent_req_nomem(state->vector, req)) {
1475                 goto post;
1476         }
1477         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1478         state->count    = count;
1479         state->ret      = 0;
1480
1481         talloc_set_destructor(state, tstream_bsd_readv_destructor);
1482
1483         if (bsds->fd == -1) {
1484                 tevent_req_error(req, ENOTCONN);
1485                 goto post;
1486         }
1487
1488         /*
1489          * this is a fast path, not waiting for the
1490          * socket to become explicit readable gains
1491          * about 10%-20% performance in benchmark tests.
1492          */
1493         tstream_bsd_readv_handler(req);
1494         if (!tevent_req_is_in_progress(req)) {
1495                 goto post;
1496         }
1497
1498         ret = tstream_bsd_set_readable_handler(bsds, ev,
1499                                               tstream_bsd_readv_handler,
1500                                               req);
1501         if (ret == -1) {
1502                 tevent_req_error(req, errno);
1503                 goto post;
1504         }
1505
1506         return req;
1507
1508  post:
1509         tevent_req_post(req, ev);
1510         return req;
1511 }
1512
1513 static void tstream_bsd_readv_handler(void *private_data)
1514 {
1515         struct tevent_req *req = talloc_get_type_abort(private_data,
1516                                  struct tevent_req);
1517         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1518                                         struct tstream_bsd_readv_state);
1519         struct tstream_context *stream = state->stream;
1520         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1521         int ret;
1522         int err;
1523         bool retry;
1524
1525         ret = readv(bsds->fd, state->vector, state->count);
1526         if (ret == 0) {
1527                 /* propagate end of file */
1528                 tevent_req_error(req, EPIPE);
1529                 return;
1530         }
1531         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1532         if (retry) {
1533                 /* retry later */
1534                 return;
1535         }
1536         if (tevent_req_error(req, err)) {
1537                 return;
1538         }
1539
1540         state->ret += ret;
1541
1542         while (ret > 0) {
1543                 if (ret < state->vector[0].iov_len) {
1544                         uint8_t *base;
1545                         base = (uint8_t *)state->vector[0].iov_base;
1546                         base += ret;
1547                         state->vector[0].iov_base = base;
1548                         state->vector[0].iov_len -= ret;
1549                         break;
1550                 }
1551                 ret -= state->vector[0].iov_len;
1552                 state->vector += 1;
1553                 state->count -= 1;
1554         }
1555
1556         /*
1557          * there're maybe some empty vectors at the end
1558          * which we need to skip, otherwise we would get
1559          * ret == 0 from the readv() call and return EPIPE
1560          */
1561         while (state->count > 0) {
1562                 if (state->vector[0].iov_len > 0) {
1563                         break;
1564                 }
1565                 state->vector += 1;
1566                 state->count -= 1;
1567         }
1568
1569         if (state->count > 0) {
1570                 /* we have more to read */
1571                 return;
1572         }
1573
1574         tevent_req_done(req);
1575 }
1576
1577 static int tstream_bsd_readv_recv(struct tevent_req *req,
1578                                   int *perrno)
1579 {
1580         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1581                                         struct tstream_bsd_readv_state);
1582         int ret;
1583
1584         ret = tsocket_simple_int_recv(req, perrno);
1585         if (ret == 0) {
1586                 ret = state->ret;
1587         }
1588
1589         tevent_req_received(req);
1590         return ret;
1591 }
1592
1593 struct tstream_bsd_writev_state {
1594         struct tstream_context *stream;
1595
1596         struct iovec *vector;
1597         size_t count;
1598
1599         int ret;
1600 };
1601
1602 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
1603 {
1604         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1605                                   struct tstream_bsd);
1606
1607         tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1608
1609         return 0;
1610 }
1611
1612 static void tstream_bsd_writev_handler(void *private_data);
1613
1614 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
1615                                                  struct tevent_context *ev,
1616                                                  struct tstream_context *stream,
1617                                                  const struct iovec *vector,
1618                                                  size_t count)
1619 {
1620         struct tevent_req *req;
1621         struct tstream_bsd_writev_state *state;
1622         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1623         int ret;
1624
1625         req = tevent_req_create(mem_ctx, &state,
1626                                 struct tstream_bsd_writev_state);
1627         if (!req) {
1628                 return NULL;
1629         }
1630
1631         state->stream   = stream;
1632         /* we make a copy of the vector so that we can modify it */
1633         state->vector   = talloc_array(state, struct iovec, count);
1634         if (tevent_req_nomem(state->vector, req)) {
1635                 goto post;
1636         }
1637         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1638         state->count    = count;
1639         state->ret      = 0;
1640
1641         talloc_set_destructor(state, tstream_bsd_writev_destructor);
1642
1643         if (bsds->fd == -1) {
1644                 tevent_req_error(req, ENOTCONN);
1645                 goto post;
1646         }
1647
1648         /*
1649          * this is a fast path, not waiting for the
1650          * socket to become explicit writeable gains
1651          * about 10%-20% performance in benchmark tests.
1652          */
1653         tstream_bsd_writev_handler(req);
1654         if (!tevent_req_is_in_progress(req)) {
1655                 goto post;
1656         }
1657
1658         ret = tstream_bsd_set_writeable_handler(bsds, ev,
1659                                                tstream_bsd_writev_handler,
1660                                                req);
1661         if (ret == -1) {
1662                 tevent_req_error(req, errno);
1663                 goto post;
1664         }
1665
1666         return req;
1667
1668  post:
1669         tevent_req_post(req, ev);
1670         return req;
1671 }
1672
1673 static void tstream_bsd_writev_handler(void *private_data)
1674 {
1675         struct tevent_req *req = talloc_get_type_abort(private_data,
1676                                  struct tevent_req);
1677         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1678                                         struct tstream_bsd_writev_state);
1679         struct tstream_context *stream = state->stream;
1680         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1681         ssize_t ret;
1682         int err;
1683         bool retry;
1684
1685         ret = writev(bsds->fd, state->vector, state->count);
1686         if (ret == 0) {
1687                 /* propagate end of file */
1688                 tevent_req_error(req, EPIPE);
1689                 return;
1690         }
1691         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1692         if (retry) {
1693                 /* retry later */
1694                 return;
1695         }
1696         if (tevent_req_error(req, err)) {
1697                 return;
1698         }
1699
1700         state->ret += ret;
1701
1702         while (ret > 0) {
1703                 if (ret < state->vector[0].iov_len) {
1704                         uint8_t *base;
1705                         base = (uint8_t *)state->vector[0].iov_base;
1706                         base += ret;
1707                         state->vector[0].iov_base = base;
1708                         state->vector[0].iov_len -= ret;
1709                         break;
1710                 }
1711                 ret -= state->vector[0].iov_len;
1712                 state->vector += 1;
1713                 state->count -= 1;
1714         }
1715
1716         /*
1717          * there're maybe some empty vectors at the end
1718          * which we need to skip, otherwise we would get
1719          * ret == 0 from the writev() call and return EPIPE
1720          */
1721         while (state->count > 0) {
1722                 if (state->vector[0].iov_len > 0) {
1723                         break;
1724                 }
1725                 state->vector += 1;
1726                 state->count -= 1;
1727         }
1728
1729         if (state->count > 0) {
1730                 /* we have more to read */
1731                 return;
1732         }
1733
1734         tevent_req_done(req);
1735 }
1736
1737 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
1738 {
1739         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1740                                         struct tstream_bsd_writev_state);
1741         int ret;
1742
1743         ret = tsocket_simple_int_recv(req, perrno);
1744         if (ret == 0) {
1745                 ret = state->ret;
1746         }
1747
1748         tevent_req_received(req);
1749         return ret;
1750 }
1751
1752 struct tstream_bsd_disconnect_state {
1753         void *__dummy;
1754 };
1755
1756 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1757                                                      struct tevent_context *ev,
1758                                                      struct tstream_context *stream)
1759 {
1760         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1761         struct tevent_req *req;
1762         struct tstream_bsd_disconnect_state *state;
1763         int ret;
1764         int err;
1765         bool dummy;
1766
1767         req = tevent_req_create(mem_ctx, &state,
1768                                 struct tstream_bsd_disconnect_state);
1769         if (req == NULL) {
1770                 return NULL;
1771         }
1772
1773         if (bsds->fd == -1) {
1774                 tevent_req_error(req, ENOTCONN);
1775                 goto post;
1776         }
1777
1778         ret = close(bsds->fd);
1779         bsds->fd = -1;
1780         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1781         if (tevent_req_error(req, err)) {
1782                 goto post;
1783         }
1784
1785         tevent_req_done(req);
1786 post:
1787         tevent_req_post(req, ev);
1788         return req;
1789 }
1790
1791 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
1792                                       int *perrno)
1793 {
1794         int ret;
1795
1796         ret = tsocket_simple_int_recv(req, perrno);
1797
1798         tevent_req_received(req);
1799         return ret;
1800 }
1801
1802 static const struct tstream_context_ops tstream_bsd_ops = {
1803         .name                   = "bsd",
1804
1805         .pending_bytes          = tstream_bsd_pending_bytes,
1806
1807         .readv_send             = tstream_bsd_readv_send,
1808         .readv_recv             = tstream_bsd_readv_recv,
1809
1810         .writev_send            = tstream_bsd_writev_send,
1811         .writev_recv            = tstream_bsd_writev_recv,
1812
1813         .disconnect_send        = tstream_bsd_disconnect_send,
1814         .disconnect_recv        = tstream_bsd_disconnect_recv,
1815 };
1816
1817 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
1818 {
1819         TALLOC_FREE(bsds->fde);
1820         if (bsds->fd != -1) {
1821                 close(bsds->fd);
1822                 bsds->fd = -1;
1823         }
1824         return 0;
1825 }
1826
1827 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
1828                                  int fd,
1829                                  struct tstream_context **_stream,
1830                                  const char *location)
1831 {
1832         struct tstream_context *stream;
1833         struct tstream_bsd *bsds;
1834
1835         stream = tstream_context_create(mem_ctx,
1836                                         &tstream_bsd_ops,
1837                                         &bsds,
1838                                         struct tstream_bsd,
1839                                         location);
1840         if (!stream) {
1841                 return -1;
1842         }
1843         ZERO_STRUCTP(bsds);
1844         bsds->fd = fd;
1845         talloc_set_destructor(bsds, tstream_bsd_destructor);
1846
1847         *_stream = stream;
1848         return 0;
1849 }
1850
1851 struct tstream_bsd_connect_state {
1852         int fd;
1853         struct tevent_fd *fde;
1854         struct tstream_conext *stream;
1855 };
1856
1857 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
1858 {
1859         TALLOC_FREE(state->fde);
1860         if (state->fd != -1) {
1861                 close(state->fd);
1862                 state->fd = -1;
1863         }
1864
1865         return 0;
1866 }
1867
1868 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
1869                                             struct tevent_fd *fde,
1870                                             uint16_t flags,
1871                                             void *private_data);
1872
1873 static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
1874                                         struct tevent_context *ev,
1875                                         int sys_errno,
1876                                         const struct tsocket_address *local,
1877                                         const struct tsocket_address *remote)
1878 {
1879         struct tevent_req *req;
1880         struct tstream_bsd_connect_state *state;
1881         struct tsocket_address_bsd *lbsda =
1882                 talloc_get_type_abort(local->private_data,
1883                 struct tsocket_address_bsd);
1884         struct tsocket_address_bsd *rbsda =
1885                 talloc_get_type_abort(remote->private_data,
1886                 struct tsocket_address_bsd);
1887         int ret;
1888         int err;
1889         bool retry;
1890         bool do_bind = false;
1891         bool do_reuseaddr = false;
1892         socklen_t sa_socklen = sizeof(rbsda->u.ss);
1893
1894         req = tevent_req_create(mem_ctx, &state,
1895                                 struct tstream_bsd_connect_state);
1896         if (!req) {
1897                 return NULL;
1898         }
1899         state->fd = -1;
1900         state->fde = NULL;
1901
1902         talloc_set_destructor(state, tstream_bsd_connect_destructor);
1903
1904         /* give the wrappers a chance to report an error */
1905         if (sys_errno != 0) {
1906                 tevent_req_error(req, sys_errno);
1907                 goto post;
1908         }
1909
1910         switch (lbsda->u.sa.sa_family) {
1911         case AF_UNIX:
1912                 if (lbsda->u.un.sun_path[0] != 0) {
1913                         do_reuseaddr = true;
1914                         do_bind = true;
1915                 }
1916                 /*
1917                  * for unix sockets we can't use the size of sockaddr_storage
1918                  * we would get EINVAL
1919                  */
1920                 sa_socklen = sizeof(rbsda->u.un);
1921                 break;
1922         case AF_INET:
1923                 if (lbsda->u.in.sin_port != 0) {
1924                         do_reuseaddr = true;
1925                         do_bind = true;
1926                 }
1927                 if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
1928                         do_bind = true;
1929                 }
1930                 break;
1931 #ifdef HAVE_IPV6
1932         case AF_INET6:
1933                 if (lbsda->u.in6.sin6_port != 0) {
1934                         do_reuseaddr = true;
1935                         do_bind = true;
1936                 }
1937                 if (memcmp(&in6addr_any,
1938                            &lbsda->u.in6.sin6_addr,
1939                            sizeof(in6addr_any)) != 0) {
1940                         do_bind = true;
1941                 }
1942                 break;
1943 #endif
1944         default:
1945                 tevent_req_error(req, EINVAL);
1946                 goto post;
1947         }
1948
1949         state->fd = socket(lbsda->u.sa.sa_family, SOCK_STREAM, 0);
1950         if (state->fd == -1) {
1951                 tevent_req_error(req, errno);
1952                 goto post;
1953         }
1954
1955         state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
1956         if (state->fd == -1) {
1957                 tevent_req_error(req, errno);
1958                 goto post;
1959         }
1960
1961         if (do_reuseaddr) {
1962                 int val = 1;
1963
1964                 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
1965                                  (const void *)&val, sizeof(val));
1966                 if (ret == -1) {
1967                         tevent_req_error(req, errno);
1968                         goto post;
1969                 }
1970         }
1971
1972         if (do_bind) {
1973                 ret = bind(state->fd, &lbsda->u.sa, sizeof(lbsda->u.ss));
1974                 if (ret == -1) {
1975                         tevent_req_error(req, errno);
1976                         goto post;
1977                 }
1978         }
1979
1980         ret = connect(state->fd, &rbsda->u.sa, sa_socklen);
1981         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1982         if (retry) {
1983                 /* retry later */
1984                 goto async;
1985         }
1986         if (tevent_req_error(req, err)) {
1987                 goto post;
1988         }
1989
1990         tevent_req_done(req);
1991         goto post;
1992
1993  async:
1994         state->fde = tevent_add_fd(ev, state,
1995                                    state->fd,
1996                                    TEVENT_FD_READ | TEVENT_FD_WRITE,
1997                                    tstream_bsd_connect_fde_handler,
1998                                    req);
1999         if (tevent_req_nomem(state->fde, req)) {
2000                 goto post;
2001         }
2002
2003         return req;
2004
2005  post:
2006         tevent_req_post(req, ev);
2007         return req;
2008 }
2009
2010 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2011                                             struct tevent_fd *fde,
2012                                             uint16_t flags,
2013                                             void *private_data)
2014 {
2015         struct tevent_req *req = talloc_get_type_abort(private_data,
2016                                  struct tevent_req);
2017         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2018                                         struct tstream_bsd_connect_state);
2019         int ret;
2020         int error=0;
2021         socklen_t len = sizeof(error);
2022         int err;
2023         bool retry;
2024
2025         ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
2026         if (ret == 0) {
2027                 if (error != 0) {
2028                         errno = error;
2029                         ret = -1;
2030                 }
2031         }
2032         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2033         if (retry) {
2034                 /* retry later */
2035                 return;
2036         }
2037         if (tevent_req_error(req, err)) {
2038                 return;
2039         }
2040
2041         tevent_req_done(req);
2042 }
2043
2044 static int tstream_bsd_connect_recv(struct tevent_req *req,
2045                                     int *perrno,
2046                                     TALLOC_CTX *mem_ctx,
2047                                     struct tstream_context **stream,
2048                                     const char *location)
2049 {
2050         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2051                                         struct tstream_bsd_connect_state);
2052         int ret;
2053
2054         ret = tsocket_simple_int_recv(req, perrno);
2055         if (ret == 0) {
2056                 ret = _tstream_bsd_existing_socket(mem_ctx,
2057                                                    state->fd,
2058                                                    stream,
2059                                                    location);
2060                 if (ret == -1) {
2061                         *perrno = errno;
2062                         goto done;
2063                 }
2064                 TALLOC_FREE(state->fde);
2065                 state->fd = -1;
2066         }
2067
2068 done:
2069         tevent_req_received(req);
2070         return ret;
2071 }
2072
2073 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2074                                         struct tevent_context *ev,
2075                                         const struct tsocket_address *local,
2076                                         const struct tsocket_address *remote)
2077 {
2078         struct tsocket_address_bsd *lbsda =
2079                 talloc_get_type_abort(local->private_data,
2080                 struct tsocket_address_bsd);
2081         struct tevent_req *req;
2082         int sys_errno = 0;
2083
2084         switch (lbsda->u.sa.sa_family) {
2085         case AF_INET:
2086                 break;
2087 #ifdef HAVE_IPV6
2088         case AF_INET6:
2089                 break;
2090 #endif
2091         default:
2092                 sys_errno = EINVAL;
2093                 break;
2094         }
2095
2096         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2097
2098         return req;
2099 }
2100
2101 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2102                                    int *perrno,
2103                                    TALLOC_CTX *mem_ctx,
2104                                    struct tstream_context **stream,
2105                                    const char *location)
2106 {
2107         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2108 }
2109
2110 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2111                                         struct tevent_context *ev,
2112                                         const struct tsocket_address *local,
2113                                         const struct tsocket_address *remote)
2114 {
2115         struct tsocket_address_bsd *lbsda =
2116                 talloc_get_type_abort(local->private_data,
2117                 struct tsocket_address_bsd);
2118         struct tevent_req *req;
2119         int sys_errno = 0;
2120
2121         switch (lbsda->u.sa.sa_family) {
2122         case AF_UNIX:
2123                 break;
2124         default:
2125                 sys_errno = EINVAL;
2126                 break;
2127         }
2128
2129         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2130
2131         return req;
2132 }
2133
2134 int _tstream_unix_connect_recv(struct tevent_req *req,
2135                                       int *perrno,
2136                                       TALLOC_CTX *mem_ctx,
2137                                       struct tstream_context **stream,
2138                                       const char *location)
2139 {
2140         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2141 }
2142
2143 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2144                              struct tstream_context **_stream1,
2145                              TALLOC_CTX *mem_ctx2,
2146                              struct tstream_context **_stream2,
2147                              const char *location)
2148 {
2149         int ret;
2150         int fds[2];
2151         int fd1;
2152         int fd2;
2153         struct tstream_context *stream1 = NULL;
2154         struct tstream_context *stream2 = NULL;
2155
2156         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2157         if (ret == -1) {
2158                 return -1;
2159         }
2160         fd1 = fds[0];
2161         fd2 = fds[1];
2162
2163         fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2164         if (fd1 == -1) {
2165                 int sys_errno = errno;
2166                 close(fd2);
2167                 errno = sys_errno;
2168                 return -1;
2169         }
2170
2171         fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2172         if (fd2 == -1) {
2173                 int sys_errno = errno;
2174                 close(fd1);
2175                 errno = sys_errno;
2176                 return -1;
2177         }
2178
2179         ret = _tstream_bsd_existing_socket(mem_ctx1,
2180                                            fd1,
2181                                            &stream1,
2182                                            location);
2183         if (ret == -1) {
2184                 int sys_errno = errno;
2185                 close(fd1);
2186                 close(fd2);
2187                 errno = sys_errno;
2188                 return -1;
2189         }
2190
2191         ret = _tstream_bsd_existing_socket(mem_ctx2,
2192                                            fd2,
2193                                            &stream2,
2194                                            location);
2195         if (ret == -1) {
2196                 int sys_errno = errno;
2197                 talloc_free(stream1);
2198                 close(fd2);
2199                 errno = sys_errno;
2200                 return -1;
2201         }
2202
2203         *_stream1 = stream1;
2204         *_stream2 = stream2;
2205         return 0;
2206 }
2207