tsocket: add tsocket_address_bsd_sockaddr() and tsocket_address_bsd_from_sockaddr()
[samba.git] / lib / tsocket / tsocket_bsd.c
1 /*
2    Unix SMB/CIFS implementation.
3
4    Copyright (C) Stefan Metzmacher 2009
5
6      ** NOTE! The following LGPL license applies to the tevent
7      ** library. This does NOT imply that all of Samba is released
8      ** under the LGPL
9
10    This library is free software; you can redistribute it and/or
11    modify it under the terms of the GNU Lesser General Public
12    License as published by the Free Software Foundation; either
13    version 3 of the License, or (at your option) any later version.
14
15    This library is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18    Lesser General Public License for more details.
19
20    You should have received a copy of the GNU Lesser General Public
21    License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #include "replace.h"
25 #include "system/filesys.h"
26 #include "system/network.h"
27 #include "tsocket.h"
28 #include "tsocket_internal.h"
29
30 static int tsocket_bsd_error_from_errno(int ret,
31                                         int sys_errno,
32                                         bool *retry)
33 {
34         *retry = false;
35
36         if (ret >= 0) {
37                 return 0;
38         }
39
40         if (ret != -1) {
41                 return EIO;
42         }
43
44         if (sys_errno == 0) {
45                 return EIO;
46         }
47
48         if (sys_errno == EINTR) {
49                 *retry = true;
50                 return sys_errno;
51         }
52
53         if (sys_errno == EINPROGRESS) {
54                 *retry = true;
55                 return sys_errno;
56         }
57
58         if (sys_errno == EAGAIN) {
59                 *retry = true;
60                 return sys_errno;
61         }
62
63 #ifdef EWOULDBLOCK
64         if (sys_errno == EWOULDBLOCK) {
65                 *retry = true;
66                 return sys_errno;
67         }
68 #endif
69
70         return sys_errno;
71 }
72
73 static int tsocket_bsd_common_prepare_fd(int fd, bool high_fd)
74 {
75         int i;
76         int sys_errno = 0;
77         int fds[3];
78         int num_fds = 0;
79
80         int result, flags;
81
82         if (fd == -1) {
83                 return -1;
84         }
85
86         /* first make a fd >= 3 */
87         if (high_fd) {
88                 while (fd < 3) {
89                         fds[num_fds++] = fd;
90                         fd = dup(fd);
91                         if (fd == -1) {
92                                 sys_errno = errno;
93                                 break;
94                         }
95                 }
96                 for (i=0; i<num_fds; i++) {
97                         close(fds[i]);
98                 }
99                 if (fd == -1) {
100                         errno = sys_errno;
101                         return fd;
102                 }
103         }
104
105         /* fd should be nonblocking. */
106
107 #ifdef O_NONBLOCK
108 #define FLAG_TO_SET O_NONBLOCK
109 #else
110 #ifdef SYSV
111 #define FLAG_TO_SET O_NDELAY
112 #else /* BSD */
113 #define FLAG_TO_SET FNDELAY
114 #endif
115 #endif
116
117         if ((flags = fcntl(fd, F_GETFL)) == -1) {
118                 goto fail;
119         }
120
121         flags |= FLAG_TO_SET;
122         if (fcntl(fd, F_SETFL, flags) == -1) {
123                 goto fail;
124         }
125
126 #undef FLAG_TO_SET
127
128         /* fd should be closed on exec() */
129 #ifdef FD_CLOEXEC
130         result = flags = fcntl(fd, F_GETFD, 0);
131         if (flags >= 0) {
132                 flags |= FD_CLOEXEC;
133                 result = fcntl(fd, F_SETFD, flags);
134         }
135         if (result < 0) {
136                 goto fail;
137         }
138 #endif
139         return fd;
140
141  fail:
142         if (fd != -1) {
143                 sys_errno = errno;
144                 close(fd);
145                 errno = sys_errno;
146         }
147         return -1;
148 }
149
150 static ssize_t tsocket_bsd_pending(int fd)
151 {
152         int ret, error;
153         int value = 0;
154         socklen_t len;
155
156         ret = ioctl(fd, FIONREAD, &value);
157         if (ret == -1) {
158                 return ret;
159         }
160
161         if (ret != 0) {
162                 /* this should not be reached */
163                 errno = EIO;
164                 return -1;
165         }
166
167         if (value != 0) {
168                 return value;
169         }
170
171         error = 0;
172         len = sizeof(error);
173
174         /*
175          * if no data is available check if the socket is in error state. For
176          * dgram sockets it's the way to return ICMP error messages of
177          * connected sockets to the caller.
178          */
179         ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &error, &len);
180         if (ret == -1) {
181                 return ret;
182         }
183         if (error != 0) {
184                 errno = error;
185                 return -1;
186         }
187         return 0;
188 }
189
190 static const struct tsocket_address_ops tsocket_address_bsd_ops;
191
192 struct tsocket_address_bsd {
193         union {
194                 struct sockaddr sa;
195                 struct sockaddr_in in;
196 #ifdef HAVE_IPV6
197                 struct sockaddr_in6 in6;
198 #endif
199                 struct sockaddr_un un;
200                 struct sockaddr_storage ss;
201         } u;
202 };
203
204 int _tsocket_address_bsd_from_sockaddr(TALLOC_CTX *mem_ctx,
205                                        struct sockaddr *sa,
206                                        size_t sa_socklen,
207                                        struct tsocket_address **_addr,
208                                        const char *location)
209 {
210         struct tsocket_address *addr;
211         struct tsocket_address_bsd *bsda;
212
213         switch (sa->sa_family) {
214         case AF_UNIX:
215                 if (sa_socklen < sizeof(struct sockaddr_un)) {
216                         errno = EINVAL;
217                         return -1;
218                 }
219                 break;
220         case AF_INET:
221                 if (sa_socklen < sizeof(struct sockaddr_in)) {
222                         errno = EINVAL;
223                         return -1;
224                 }
225                 break;
226 #ifdef HAVE_IPV6
227         case AF_INET6:
228                 if (sa_socklen < sizeof(struct sockaddr_in6)) {
229                         errno = EINVAL;
230                         return -1;
231                 }
232                 break;
233 #endif
234         default:
235                 errno = EAFNOSUPPORT;
236                 return -1;
237         }
238
239         if (sa_socklen > sizeof(struct sockaddr_storage)) {
240                 errno = EINVAL;
241                 return -1;
242         }
243
244         addr = tsocket_address_create(mem_ctx,
245                                       &tsocket_address_bsd_ops,
246                                       &bsda,
247                                       struct tsocket_address_bsd,
248                                       location);
249         if (!addr) {
250                 errno = ENOMEM;
251                 return -1;
252         }
253
254         ZERO_STRUCTP(bsda);
255
256         memcpy(&bsda->u.ss, sa, sa_socklen);
257
258         *_addr = addr;
259         return 0;
260 }
261
262 ssize_t tsocket_address_bsd_sockaddr(const struct tsocket_address *addr,
263                                      struct sockaddr *sa,
264                                      size_t sa_socklen)
265 {
266         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
267                                            struct tsocket_address_bsd);
268         ssize_t rlen = 0;
269
270         if (!bsda) {
271                 errno = EINVAL;
272                 return -1;
273         }
274
275         switch (bsda->u.sa.sa_family) {
276         case AF_UNIX:
277                 rlen = sizeof(struct sockaddr_un);
278                 break;
279         case AF_INET:
280                 rlen = sizeof(struct sockaddr_in);
281                 break;
282 #ifdef HAVE_IPV6
283         case AF_INET6:
284                 rlen = sizeof(struct sockaddr_in6);
285                 break;
286 #endif
287         default:
288                 errno = EAFNOSUPPORT;
289                 return -1;
290         }
291
292         if (sa_socklen < rlen) {
293                 errno = EINVAL;
294                 return -1;
295         }
296
297         if (sa_socklen > sizeof(struct sockaddr_storage)) {
298                 memset(sa, 0, sa_socklen);
299                 sa_socklen = sizeof(struct sockaddr_storage);
300         }
301
302         memcpy(sa, &bsda->u.ss, sa_socklen);
303         return rlen;
304 }
305
306 int _tsocket_address_inet_from_strings(TALLOC_CTX *mem_ctx,
307                                        const char *fam,
308                                        const char *addr,
309                                        uint16_t port,
310                                        struct tsocket_address **_addr,
311                                        const char *location)
312 {
313         struct addrinfo hints;
314         struct addrinfo *result = NULL;
315         char port_str[6];
316         int ret;
317
318         ZERO_STRUCT(hints);
319         /*
320          * we use SOCKET_STREAM here to get just one result
321          * back from getaddrinfo().
322          */
323         hints.ai_socktype = SOCK_STREAM;
324         hints.ai_flags = AI_NUMERICHOST | AI_NUMERICSERV;
325
326         if (strcasecmp(fam, "ip") == 0) {
327                 hints.ai_family = AF_UNSPEC;
328                 if (!addr) {
329 #ifdef HAVE_IPV6
330                         addr = "::";
331 #else
332                         addr = "0.0.0.0";
333 #endif
334                 }
335         } else if (strcasecmp(fam, "ipv4") == 0) {
336                 hints.ai_family = AF_INET;
337                 if (!addr) {
338                         addr = "0.0.0.0";
339                 }
340 #ifdef HAVE_IPV6
341         } else if (strcasecmp(fam, "ipv6") == 0) {
342                 hints.ai_family = AF_INET6;
343                 if (!addr) {
344                         addr = "::";
345                 }
346 #endif
347         } else {
348                 errno = EAFNOSUPPORT;
349                 return -1;
350         }
351
352         snprintf(port_str, sizeof(port_str) - 1, "%u", port);
353
354         ret = getaddrinfo(addr, port_str, &hints, &result);
355         if (ret != 0) {
356                 switch (ret) {
357                 case EAI_FAIL:
358                         errno = EINVAL;
359                         break;
360                 }
361                 ret = -1;
362                 goto done;
363         }
364
365         if (result->ai_socktype != SOCK_STREAM) {
366                 errno = EINVAL;
367                 ret = -1;
368                 goto done;
369         }
370
371         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
372                                                   result->ai_addr,
373                                                   result->ai_addrlen,
374                                                   _addr,
375                                                   location);
376
377 done:
378         if (result) {
379                 freeaddrinfo(result);
380         }
381         return ret;
382 }
383
384 char *tsocket_address_inet_addr_string(const struct tsocket_address *addr,
385                                        TALLOC_CTX *mem_ctx)
386 {
387         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
388                                            struct tsocket_address_bsd);
389         char addr_str[INET6_ADDRSTRLEN+1];
390         const char *str;
391
392         if (!bsda) {
393                 errno = EINVAL;
394                 return NULL;
395         }
396
397         switch (bsda->u.sa.sa_family) {
398         case AF_INET:
399                 str = inet_ntop(bsda->u.in.sin_family,
400                                 &bsda->u.in.sin_addr,
401                                 addr_str, sizeof(addr_str));
402                 break;
403 #ifdef HAVE_IPV6
404         case AF_INET6:
405                 str = inet_ntop(bsda->u.in6.sin6_family,
406                                 &bsda->u.in6.sin6_addr,
407                                 addr_str, sizeof(addr_str));
408                 break;
409 #endif
410         default:
411                 errno = EINVAL;
412                 return NULL;
413         }
414
415         if (!str) {
416                 return NULL;
417         }
418
419         return talloc_strdup(mem_ctx, str);
420 }
421
422 uint16_t tsocket_address_inet_port(const struct tsocket_address *addr)
423 {
424         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
425                                            struct tsocket_address_bsd);
426         uint16_t port = 0;
427
428         if (!bsda) {
429                 errno = EINVAL;
430                 return 0;
431         }
432
433         switch (bsda->u.sa.sa_family) {
434         case AF_INET:
435                 port = ntohs(bsda->u.in.sin_port);
436                 break;
437 #ifdef HAVE_IPV6
438         case AF_INET6:
439                 port = ntohs(bsda->u.in6.sin6_port);
440                 break;
441 #endif
442         default:
443                 errno = EINVAL;
444                 return 0;
445         }
446
447         return port;
448 }
449
450 int tsocket_address_inet_set_port(struct tsocket_address *addr,
451                                   uint16_t port)
452 {
453         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
454                                            struct tsocket_address_bsd);
455
456         if (!bsda) {
457                 errno = EINVAL;
458                 return -1;
459         }
460
461         switch (bsda->u.sa.sa_family) {
462         case AF_INET:
463                 bsda->u.in.sin_port = htons(port);
464                 break;
465 #ifdef HAVE_IPV6
466         case AF_INET6:
467                 bsda->u.in6.sin6_port = htons(port);
468                 break;
469 #endif
470         default:
471                 errno = EINVAL;
472                 return -1;
473         }
474
475         return 0;
476 }
477
478 int _tsocket_address_unix_from_path(TALLOC_CTX *mem_ctx,
479                                     const char *path,
480                                     struct tsocket_address **_addr,
481                                     const char *location)
482 {
483         struct sockaddr_un un;
484         void *p = &un;
485         int ret;
486
487         if (!path) {
488                 path = "";
489         }
490
491         if (strlen(path) > sizeof(un.sun_path)-1) {
492                 errno = ENAMETOOLONG;
493                 return -1;
494         }
495
496         ZERO_STRUCT(un);
497         un.sun_family = AF_UNIX;
498         strncpy(un.sun_path, path, sizeof(un.sun_path)-1);
499
500         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
501                                                  (struct sockaddr *)p,
502                                                  sizeof(un),
503                                                  _addr,
504                                                  location);
505
506         return ret;
507 }
508
509 char *tsocket_address_unix_path(const struct tsocket_address *addr,
510                                 TALLOC_CTX *mem_ctx)
511 {
512         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
513                                            struct tsocket_address_bsd);
514         const char *str;
515
516         if (!bsda) {
517                 errno = EINVAL;
518                 return NULL;
519         }
520
521         switch (bsda->u.sa.sa_family) {
522         case AF_UNIX:
523                 str = bsda->u.un.sun_path;
524                 break;
525         default:
526                 errno = EINVAL;
527                 return NULL;
528         }
529
530         return talloc_strdup(mem_ctx, str);
531 }
532
533 static char *tsocket_address_bsd_string(const struct tsocket_address *addr,
534                                         TALLOC_CTX *mem_ctx)
535 {
536         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
537                                            struct tsocket_address_bsd);
538         char *str;
539         char *addr_str;
540         const char *prefix = NULL;
541         uint16_t port;
542
543         switch (bsda->u.sa.sa_family) {
544         case AF_UNIX:
545                 return talloc_asprintf(mem_ctx, "unix:%s",
546                                        bsda->u.un.sun_path);
547         case AF_INET:
548                 prefix = "ipv4";
549                 break;
550 #ifdef HAVE_IPV6
551         case AF_INET6:
552                 prefix = "ipv6";
553                 break;
554 #endif
555         default:
556                 errno = EINVAL;
557                 return NULL;
558         }
559
560         addr_str = tsocket_address_inet_addr_string(addr, mem_ctx);
561         if (!addr_str) {
562                 return NULL;
563         }
564
565         port = tsocket_address_inet_port(addr);
566
567         str = talloc_asprintf(mem_ctx, "%s:%s:%u",
568                               prefix, addr_str, port);
569         talloc_free(addr_str);
570
571         return str;
572 }
573
574 static struct tsocket_address *tsocket_address_bsd_copy(const struct tsocket_address *addr,
575                                                          TALLOC_CTX *mem_ctx,
576                                                          const char *location)
577 {
578         struct tsocket_address_bsd *bsda = talloc_get_type(addr->private_data,
579                                            struct tsocket_address_bsd);
580         struct tsocket_address *copy;
581         int ret;
582
583         ret = _tsocket_address_bsd_from_sockaddr(mem_ctx,
584                                                  &bsda->u.sa,
585                                                  sizeof(bsda->u.ss),
586                                                  &copy,
587                                                  location);
588         if (ret != 0) {
589                 return NULL;
590         }
591
592         return copy;
593 }
594
595 static const struct tsocket_address_ops tsocket_address_bsd_ops = {
596         .name           = "bsd",
597         .string         = tsocket_address_bsd_string,
598         .copy           = tsocket_address_bsd_copy,
599 };
600
601 struct tdgram_bsd {
602         int fd;
603
604         void *event_ptr;
605         struct tevent_fd *fde;
606
607         void *readable_private;
608         void (*readable_handler)(void *private_data);
609         void *writeable_private;
610         void (*writeable_handler)(void *private_data);
611 };
612
613 static void tdgram_bsd_fde_handler(struct tevent_context *ev,
614                                    struct tevent_fd *fde,
615                                    uint16_t flags,
616                                    void *private_data)
617 {
618         struct tdgram_bsd *bsds = talloc_get_type_abort(private_data,
619                                   struct tdgram_bsd);
620
621         if (flags & TEVENT_FD_WRITE) {
622                 bsds->writeable_handler(bsds->writeable_private);
623                 return;
624         }
625         if (flags & TEVENT_FD_READ) {
626                 if (!bsds->readable_handler) {
627                         TEVENT_FD_NOT_READABLE(bsds->fde);
628                         return;
629                 }
630                 bsds->readable_handler(bsds->readable_private);
631                 return;
632         }
633 }
634
635 static int tdgram_bsd_set_readable_handler(struct tdgram_bsd *bsds,
636                                            struct tevent_context *ev,
637                                            void (*handler)(void *private_data),
638                                            void *private_data)
639 {
640         if (ev == NULL) {
641                 if (handler) {
642                         errno = EINVAL;
643                         return -1;
644                 }
645                 if (!bsds->readable_handler) {
646                         return 0;
647                 }
648                 bsds->readable_handler = NULL;
649                 bsds->readable_private = NULL;
650
651                 return 0;
652         }
653
654         /* read and write must use the same tevent_context */
655         if (bsds->event_ptr != ev) {
656                 if (bsds->readable_handler || bsds->writeable_handler) {
657                         errno = EINVAL;
658                         return -1;
659                 }
660                 bsds->event_ptr = NULL;
661                 TALLOC_FREE(bsds->fde);
662         }
663
664         if (tevent_fd_get_flags(bsds->fde) == 0) {
665                 TALLOC_FREE(bsds->fde);
666
667                 bsds->fde = tevent_add_fd(ev, bsds,
668                                           bsds->fd, TEVENT_FD_READ,
669                                           tdgram_bsd_fde_handler,
670                                           bsds);
671                 if (!bsds->fde) {
672                         errno = ENOMEM;
673                         return -1;
674                 }
675
676                 /* cache the event context we're running on */
677                 bsds->event_ptr = ev;
678         } else if (!bsds->readable_handler) {
679                 TEVENT_FD_READABLE(bsds->fde);
680         }
681
682         bsds->readable_handler = handler;
683         bsds->readable_private = private_data;
684
685         return 0;
686 }
687
688 static int tdgram_bsd_set_writeable_handler(struct tdgram_bsd *bsds,
689                                             struct tevent_context *ev,
690                                             void (*handler)(void *private_data),
691                                             void *private_data)
692 {
693         if (ev == NULL) {
694                 if (handler) {
695                         errno = EINVAL;
696                         return -1;
697                 }
698                 if (!bsds->writeable_handler) {
699                         return 0;
700                 }
701                 bsds->writeable_handler = NULL;
702                 bsds->writeable_private = NULL;
703                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
704
705                 return 0;
706         }
707
708         /* read and write must use the same tevent_context */
709         if (bsds->event_ptr != ev) {
710                 if (bsds->readable_handler || bsds->writeable_handler) {
711                         errno = EINVAL;
712                         return -1;
713                 }
714                 bsds->event_ptr = NULL;
715                 TALLOC_FREE(bsds->fde);
716         }
717
718         if (tevent_fd_get_flags(bsds->fde) == 0) {
719                 TALLOC_FREE(bsds->fde);
720
721                 bsds->fde = tevent_add_fd(ev, bsds,
722                                           bsds->fd, TEVENT_FD_WRITE,
723                                           tdgram_bsd_fde_handler,
724                                           bsds);
725                 if (!bsds->fde) {
726                         errno = ENOMEM;
727                         return -1;
728                 }
729
730                 /* cache the event context we're running on */
731                 bsds->event_ptr = ev;
732         } else if (!bsds->writeable_handler) {
733                 TEVENT_FD_WRITEABLE(bsds->fde);
734         }
735
736         bsds->writeable_handler = handler;
737         bsds->writeable_private = private_data;
738
739         return 0;
740 }
741
742 struct tdgram_bsd_recvfrom_state {
743         struct tdgram_context *dgram;
744
745         uint8_t *buf;
746         size_t len;
747         struct tsocket_address *src;
748 };
749
750 static int tdgram_bsd_recvfrom_destructor(struct tdgram_bsd_recvfrom_state *state)
751 {
752         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
753                                   struct tdgram_bsd);
754
755         tdgram_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
756
757         return 0;
758 }
759
760 static void tdgram_bsd_recvfrom_handler(void *private_data);
761
762 static struct tevent_req *tdgram_bsd_recvfrom_send(TALLOC_CTX *mem_ctx,
763                                         struct tevent_context *ev,
764                                         struct tdgram_context *dgram)
765 {
766         struct tevent_req *req;
767         struct tdgram_bsd_recvfrom_state *state;
768         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
769         int ret;
770
771         req = tevent_req_create(mem_ctx, &state,
772                                 struct tdgram_bsd_recvfrom_state);
773         if (!req) {
774                 return NULL;
775         }
776
777         state->dgram    = dgram;
778         state->buf      = NULL;
779         state->len      = 0;
780         state->src      = NULL;
781
782         talloc_set_destructor(state, tdgram_bsd_recvfrom_destructor);
783
784         if (bsds->fd == -1) {
785                 tevent_req_error(req, ENOTCONN);
786                 goto post;
787         }
788
789         /*
790          * this is a fast path, not waiting for the
791          * socket to become explicit readable gains
792          * about 10%-20% performance in benchmark tests.
793          */
794         tdgram_bsd_recvfrom_handler(req);
795         if (!tevent_req_is_in_progress(req)) {
796                 goto post;
797         }
798
799         ret = tdgram_bsd_set_readable_handler(bsds, ev,
800                                               tdgram_bsd_recvfrom_handler,
801                                               req);
802         if (ret == -1) {
803                 tevent_req_error(req, errno);
804                 goto post;
805         }
806
807         return req;
808
809  post:
810         tevent_req_post(req, ev);
811         return req;
812 }
813
814 static void tdgram_bsd_recvfrom_handler(void *private_data)
815 {
816         struct tevent_req *req = talloc_get_type_abort(private_data,
817                                  struct tevent_req);
818         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
819                                         struct tdgram_bsd_recvfrom_state);
820         struct tdgram_context *dgram = state->dgram;
821         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
822         struct tsocket_address_bsd *bsda;
823         ssize_t ret;
824         struct sockaddr *sa = NULL;
825         socklen_t sa_socklen = 0;
826         int err;
827         bool retry;
828
829         ret = tsocket_bsd_pending(bsds->fd);
830         if (ret == 0) {
831                 /* retry later */
832                 return;
833         }
834         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
835         if (retry) {
836                 /* retry later */
837                 return;
838         }
839         if (tevent_req_error(req, err)) {
840                 return;
841         }
842
843         state->buf = talloc_array(state, uint8_t, ret);
844         if (tevent_req_nomem(state->buf, req)) {
845                 return;
846         }
847         state->len = ret;
848
849         state->src = tsocket_address_create(state,
850                                             &tsocket_address_bsd_ops,
851                                             &bsda,
852                                             struct tsocket_address_bsd,
853                                             __location__ "bsd_recvfrom");
854         if (tevent_req_nomem(state->src, req)) {
855                 return;
856         }
857
858         ZERO_STRUCTP(bsda);
859
860         sa = &bsda->u.sa;
861         sa_socklen = sizeof(bsda->u.ss);
862         /*
863          * for unix sockets we can't use the size of sockaddr_storage
864          * we would get EINVAL
865          */
866         if (bsda->u.sa.sa_family == AF_UNIX) {
867                 sa_socklen = sizeof(bsda->u.un);
868         }
869
870         ret = recvfrom(bsds->fd, state->buf, state->len, 0, sa, &sa_socklen);
871         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
872         if (retry) {
873                 /* retry later */
874                 return;
875         }
876         if (tevent_req_error(req, err)) {
877                 return;
878         }
879
880         if (ret != state->len) {
881                 tevent_req_error(req, EIO);
882                 return;
883         }
884
885         tevent_req_done(req);
886 }
887
888 static ssize_t tdgram_bsd_recvfrom_recv(struct tevent_req *req,
889                                         int *perrno,
890                                         TALLOC_CTX *mem_ctx,
891                                         uint8_t **buf,
892                                         struct tsocket_address **src)
893 {
894         struct tdgram_bsd_recvfrom_state *state = tevent_req_data(req,
895                                         struct tdgram_bsd_recvfrom_state);
896         ssize_t ret;
897
898         ret = tsocket_simple_int_recv(req, perrno);
899         if (ret == 0) {
900                 *buf = talloc_move(mem_ctx, &state->buf);
901                 ret = state->len;
902                 if (src) {
903                         *src = talloc_move(mem_ctx, &state->src);
904                 }
905         }
906
907         tevent_req_received(req);
908         return ret;
909 }
910
911 struct tdgram_bsd_sendto_state {
912         struct tdgram_context *dgram;
913
914         const uint8_t *buf;
915         size_t len;
916         const struct tsocket_address *dst;
917
918         ssize_t ret;
919 };
920
921 static int tdgram_bsd_sendto_destructor(struct tdgram_bsd_sendto_state *state)
922 {
923         struct tdgram_bsd *bsds = tdgram_context_data(state->dgram,
924                                   struct tdgram_bsd);
925
926         tdgram_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
927
928         return 0;
929 }
930
931 static void tdgram_bsd_sendto_handler(void *private_data);
932
933 static struct tevent_req *tdgram_bsd_sendto_send(TALLOC_CTX *mem_ctx,
934                                                  struct tevent_context *ev,
935                                                  struct tdgram_context *dgram,
936                                                  const uint8_t *buf,
937                                                  size_t len,
938                                                  const struct tsocket_address *dst)
939 {
940         struct tevent_req *req;
941         struct tdgram_bsd_sendto_state *state;
942         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
943         int ret;
944
945         req = tevent_req_create(mem_ctx, &state,
946                                 struct tdgram_bsd_sendto_state);
947         if (!req) {
948                 return NULL;
949         }
950
951         state->dgram    = dgram;
952         state->buf      = buf;
953         state->len      = len;
954         state->dst      = dst;
955         state->ret      = -1;
956
957         talloc_set_destructor(state, tdgram_bsd_sendto_destructor);
958
959         if (bsds->fd == -1) {
960                 tevent_req_error(req, ENOTCONN);
961                 goto post;
962         }
963
964         /*
965          * this is a fast path, not waiting for the
966          * socket to become explicit writeable gains
967          * about 10%-20% performance in benchmark tests.
968          */
969         tdgram_bsd_sendto_handler(req);
970         if (!tevent_req_is_in_progress(req)) {
971                 goto post;
972         }
973
974         ret = tdgram_bsd_set_writeable_handler(bsds, ev,
975                                                tdgram_bsd_sendto_handler,
976                                                req);
977         if (ret == -1) {
978                 tevent_req_error(req, errno);
979                 goto post;
980         }
981
982         return req;
983
984  post:
985         tevent_req_post(req, ev);
986         return req;
987 }
988
989 static void tdgram_bsd_sendto_handler(void *private_data)
990 {
991         struct tevent_req *req = talloc_get_type_abort(private_data,
992                                  struct tevent_req);
993         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
994                                         struct tdgram_bsd_sendto_state);
995         struct tdgram_context *dgram = state->dgram;
996         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
997         struct sockaddr *sa = NULL;
998         socklen_t sa_socklen = 0;
999         ssize_t ret;
1000         int err;
1001         bool retry;
1002
1003         if (state->dst) {
1004                 struct tsocket_address_bsd *bsda =
1005                         talloc_get_type(state->dst->private_data,
1006                         struct tsocket_address_bsd);
1007
1008                 sa = &bsda->u.sa;
1009                 sa_socklen = sizeof(bsda->u.ss);
1010                 /*
1011                  * for unix sockets we can't use the size of sockaddr_storage
1012                  * we would get EINVAL
1013                  */
1014                 if (bsda->u.sa.sa_family == AF_UNIX) {
1015                         sa_socklen = sizeof(bsda->u.un);
1016                 }
1017         }
1018
1019         ret = sendto(bsds->fd, state->buf, state->len, 0, sa, sa_socklen);
1020         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1021         if (retry) {
1022                 /* retry later */
1023                 return;
1024         }
1025         if (tevent_req_error(req, err)) {
1026                 return;
1027         }
1028
1029         state->ret = ret;
1030
1031         tevent_req_done(req);
1032 }
1033
1034 static ssize_t tdgram_bsd_sendto_recv(struct tevent_req *req, int *perrno)
1035 {
1036         struct tdgram_bsd_sendto_state *state = tevent_req_data(req,
1037                                         struct tdgram_bsd_sendto_state);
1038         ssize_t ret;
1039
1040         ret = tsocket_simple_int_recv(req, perrno);
1041         if (ret == 0) {
1042                 ret = state->ret;
1043         }
1044
1045         tevent_req_received(req);
1046         return ret;
1047 }
1048
1049 struct tdgram_bsd_disconnect_state {
1050         uint8_t __dummy;
1051 };
1052
1053 static struct tevent_req *tdgram_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1054                                                      struct tevent_context *ev,
1055                                                      struct tdgram_context *dgram)
1056 {
1057         struct tdgram_bsd *bsds = tdgram_context_data(dgram, struct tdgram_bsd);
1058         struct tevent_req *req;
1059         struct tdgram_bsd_disconnect_state *state;
1060         int ret;
1061         int err;
1062         bool dummy;
1063
1064         req = tevent_req_create(mem_ctx, &state,
1065                                 struct tdgram_bsd_disconnect_state);
1066         if (req == NULL) {
1067                 return NULL;
1068         }
1069
1070         if (bsds->fd == -1) {
1071                 tevent_req_error(req, ENOTCONN);
1072                 goto post;
1073         }
1074
1075         ret = close(bsds->fd);
1076         bsds->fd = -1;
1077         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1078         if (tevent_req_error(req, err)) {
1079                 goto post;
1080         }
1081
1082         tevent_req_done(req);
1083 post:
1084         tevent_req_post(req, ev);
1085         return req;
1086 }
1087
1088 static int tdgram_bsd_disconnect_recv(struct tevent_req *req,
1089                                       int *perrno)
1090 {
1091         int ret;
1092
1093         ret = tsocket_simple_int_recv(req, perrno);
1094
1095         tevent_req_received(req);
1096         return ret;
1097 }
1098
1099 static const struct tdgram_context_ops tdgram_bsd_ops = {
1100         .name                   = "bsd",
1101
1102         .recvfrom_send          = tdgram_bsd_recvfrom_send,
1103         .recvfrom_recv          = tdgram_bsd_recvfrom_recv,
1104
1105         .sendto_send            = tdgram_bsd_sendto_send,
1106         .sendto_recv            = tdgram_bsd_sendto_recv,
1107
1108         .disconnect_send        = tdgram_bsd_disconnect_send,
1109         .disconnect_recv        = tdgram_bsd_disconnect_recv,
1110 };
1111
1112 static int tdgram_bsd_destructor(struct tdgram_bsd *bsds)
1113 {
1114         TALLOC_FREE(bsds->fde);
1115         if (bsds->fd != -1) {
1116                 close(bsds->fd);
1117                 bsds->fd = -1;
1118         }
1119         return 0;
1120 }
1121
1122 static int tdgram_bsd_dgram_socket(const struct tsocket_address *local,
1123                                    const struct tsocket_address *remote,
1124                                    bool broadcast,
1125                                    TALLOC_CTX *mem_ctx,
1126                                    struct tdgram_context **_dgram,
1127                                    const char *location)
1128 {
1129         struct tsocket_address_bsd *lbsda =
1130                 talloc_get_type_abort(local->private_data,
1131                 struct tsocket_address_bsd);
1132         struct tsocket_address_bsd *rbsda = NULL;
1133         struct tdgram_context *dgram;
1134         struct tdgram_bsd *bsds;
1135         int fd;
1136         int ret;
1137         bool do_bind = false;
1138         bool do_reuseaddr = false;
1139         socklen_t sa_socklen = sizeof(lbsda->u.ss);
1140
1141         if (remote) {
1142                 rbsda = talloc_get_type_abort(remote->private_data,
1143                         struct tsocket_address_bsd);
1144         }
1145
1146         switch (lbsda->u.sa.sa_family) {
1147         case AF_UNIX:
1148                 if (broadcast) {
1149                         errno = EINVAL;
1150                         return -1;
1151                 }
1152                 if (lbsda->u.un.sun_path[0] != 0) {
1153                         do_reuseaddr = true;
1154                         do_bind = true;
1155                 }
1156                 /*
1157                  * for unix sockets we can't use the size of sockaddr_storage
1158                  * we would get EINVAL
1159                  */
1160                 sa_socklen = sizeof(lbsda->u.un);
1161                 break;
1162         case AF_INET:
1163                 if (lbsda->u.in.sin_port != 0) {
1164                         do_reuseaddr = true;
1165                         do_bind = true;
1166                 }
1167                 if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
1168                         do_bind = true;
1169                 }
1170                 break;
1171 #ifdef HAVE_IPV6
1172         case AF_INET6:
1173                 if (lbsda->u.in6.sin6_port != 0) {
1174                         do_reuseaddr = true;
1175                         do_bind = true;
1176                 }
1177                 if (memcmp(&in6addr_any,
1178                            &lbsda->u.in6.sin6_addr,
1179                            sizeof(in6addr_any)) != 0) {
1180                         do_bind = true;
1181                 }
1182                 break;
1183 #endif
1184         default:
1185                 errno = EINVAL;
1186                 return -1;
1187         }
1188
1189         fd = socket(lbsda->u.sa.sa_family, SOCK_DGRAM, 0);
1190         if (fd < 0) {
1191                 return fd;
1192         }
1193
1194         fd = tsocket_bsd_common_prepare_fd(fd, true);
1195         if (fd < 0) {
1196                 return fd;
1197         }
1198
1199         dgram = tdgram_context_create(mem_ctx,
1200                                       &tdgram_bsd_ops,
1201                                       &bsds,
1202                                       struct tdgram_bsd,
1203                                       location);
1204         if (!dgram) {
1205                 int saved_errno = errno;
1206                 close(fd);
1207                 errno = saved_errno;
1208                 return -1;
1209         }
1210         ZERO_STRUCTP(bsds);
1211         bsds->fd = fd;
1212         talloc_set_destructor(bsds, tdgram_bsd_destructor);
1213
1214         if (broadcast) {
1215                 int val = 1;
1216
1217                 ret = setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
1218                                  (const void *)&val, sizeof(val));
1219                 if (ret == -1) {
1220                         int saved_errno = errno;
1221                         talloc_free(dgram);
1222                         errno = saved_errno;
1223                         return ret;
1224                 }
1225         }
1226
1227         if (do_reuseaddr) {
1228                 int val = 1;
1229
1230                 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1231                                  (const void *)&val, sizeof(val));
1232                 if (ret == -1) {
1233                         int saved_errno = errno;
1234                         talloc_free(dgram);
1235                         errno = saved_errno;
1236                         return ret;
1237                 }
1238         }
1239
1240         if (do_bind) {
1241                 ret = bind(fd, &lbsda->u.sa, sa_socklen);
1242                 if (ret == -1) {
1243                         int saved_errno = errno;
1244                         talloc_free(dgram);
1245                         errno = saved_errno;
1246                         return ret;
1247                 }
1248         }
1249
1250         if (rbsda) {
1251                 ret = connect(fd, &rbsda->u.sa, sa_socklen);
1252                 if (ret == -1) {
1253                         int saved_errno = errno;
1254                         talloc_free(dgram);
1255                         errno = saved_errno;
1256                         return ret;
1257                 }
1258         }
1259
1260         *_dgram = dgram;
1261         return 0;
1262 }
1263
1264 int _tdgram_inet_udp_socket(const struct tsocket_address *local,
1265                             const struct tsocket_address *remote,
1266                             TALLOC_CTX *mem_ctx,
1267                             struct tdgram_context **dgram,
1268                             const char *location)
1269 {
1270         struct tsocket_address_bsd *lbsda =
1271                 talloc_get_type_abort(local->private_data,
1272                 struct tsocket_address_bsd);
1273         int ret;
1274
1275         switch (lbsda->u.sa.sa_family) {
1276         case AF_INET:
1277                 break;
1278 #ifdef HAVE_IPV6
1279         case AF_INET6:
1280                 break;
1281 #endif
1282         default:
1283                 errno = EINVAL;
1284                 return -1;
1285         }
1286
1287         ret = tdgram_bsd_dgram_socket(local, remote, false,
1288                                       mem_ctx, dgram, location);
1289
1290         return ret;
1291 }
1292
1293 int _tdgram_unix_socket(const struct tsocket_address *local,
1294                         const struct tsocket_address *remote,
1295                         TALLOC_CTX *mem_ctx,
1296                         struct tdgram_context **dgram,
1297                         const char *location)
1298 {
1299         struct tsocket_address_bsd *lbsda =
1300                 talloc_get_type_abort(local->private_data,
1301                 struct tsocket_address_bsd);
1302         int ret;
1303
1304         switch (lbsda->u.sa.sa_family) {
1305         case AF_UNIX:
1306                 break;
1307         default:
1308                 errno = EINVAL;
1309                 return -1;
1310         }
1311
1312         ret = tdgram_bsd_dgram_socket(local, remote, false,
1313                                       mem_ctx, dgram, location);
1314
1315         return ret;
1316 }
1317
1318 struct tstream_bsd {
1319         int fd;
1320
1321         void *event_ptr;
1322         struct tevent_fd *fde;
1323
1324         void *readable_private;
1325         void (*readable_handler)(void *private_data);
1326         void *writeable_private;
1327         void (*writeable_handler)(void *private_data);
1328 };
1329
1330 static void tstream_bsd_fde_handler(struct tevent_context *ev,
1331                                     struct tevent_fd *fde,
1332                                     uint16_t flags,
1333                                     void *private_data)
1334 {
1335         struct tstream_bsd *bsds = talloc_get_type_abort(private_data,
1336                                    struct tstream_bsd);
1337
1338         if (flags & TEVENT_FD_WRITE) {
1339                 bsds->writeable_handler(bsds->writeable_private);
1340                 return;
1341         }
1342         if (flags & TEVENT_FD_READ) {
1343                 if (!bsds->readable_handler) {
1344                         if (bsds->writeable_handler) {
1345                                 bsds->writeable_handler(bsds->writeable_private);
1346                                 return;
1347                         }
1348                         TEVENT_FD_NOT_READABLE(bsds->fde);
1349                         return;
1350                 }
1351                 bsds->readable_handler(bsds->readable_private);
1352                 return;
1353         }
1354 }
1355
1356 static int tstream_bsd_set_readable_handler(struct tstream_bsd *bsds,
1357                                             struct tevent_context *ev,
1358                                             void (*handler)(void *private_data),
1359                                             void *private_data)
1360 {
1361         if (ev == NULL) {
1362                 if (handler) {
1363                         errno = EINVAL;
1364                         return -1;
1365                 }
1366                 if (!bsds->readable_handler) {
1367                         return 0;
1368                 }
1369                 bsds->readable_handler = NULL;
1370                 bsds->readable_private = NULL;
1371
1372                 return 0;
1373         }
1374
1375         /* read and write must use the same tevent_context */
1376         if (bsds->event_ptr != ev) {
1377                 if (bsds->readable_handler || bsds->writeable_handler) {
1378                         errno = EINVAL;
1379                         return -1;
1380                 }
1381                 bsds->event_ptr = NULL;
1382                 TALLOC_FREE(bsds->fde);
1383         }
1384
1385         if (tevent_fd_get_flags(bsds->fde) == 0) {
1386                 TALLOC_FREE(bsds->fde);
1387
1388                 bsds->fde = tevent_add_fd(ev, bsds,
1389                                           bsds->fd, TEVENT_FD_READ,
1390                                           tstream_bsd_fde_handler,
1391                                           bsds);
1392                 if (!bsds->fde) {
1393                         errno = ENOMEM;
1394                         return -1;
1395                 }
1396
1397                 /* cache the event context we're running on */
1398                 bsds->event_ptr = ev;
1399         } else if (!bsds->readable_handler) {
1400                 TEVENT_FD_READABLE(bsds->fde);
1401         }
1402
1403         bsds->readable_handler = handler;
1404         bsds->readable_private = private_data;
1405
1406         return 0;
1407 }
1408
1409 static int tstream_bsd_set_writeable_handler(struct tstream_bsd *bsds,
1410                                              struct tevent_context *ev,
1411                                              void (*handler)(void *private_data),
1412                                              void *private_data)
1413 {
1414         if (ev == NULL) {
1415                 if (handler) {
1416                         errno = EINVAL;
1417                         return -1;
1418                 }
1419                 if (!bsds->writeable_handler) {
1420                         return 0;
1421                 }
1422                 bsds->writeable_handler = NULL;
1423                 bsds->writeable_private = NULL;
1424                 TEVENT_FD_NOT_WRITEABLE(bsds->fde);
1425
1426                 return 0;
1427         }
1428
1429         /* read and write must use the same tevent_context */
1430         if (bsds->event_ptr != ev) {
1431                 if (bsds->readable_handler || bsds->writeable_handler) {
1432                         errno = EINVAL;
1433                         return -1;
1434                 }
1435                 bsds->event_ptr = NULL;
1436                 TALLOC_FREE(bsds->fde);
1437         }
1438
1439         if (tevent_fd_get_flags(bsds->fde) == 0) {
1440                 TALLOC_FREE(bsds->fde);
1441
1442                 bsds->fde = tevent_add_fd(ev, bsds,
1443                                           bsds->fd,
1444                                           TEVENT_FD_READ | TEVENT_FD_WRITE,
1445                                           tstream_bsd_fde_handler,
1446                                           bsds);
1447                 if (!bsds->fde) {
1448                         errno = ENOMEM;
1449                         return -1;
1450                 }
1451
1452                 /* cache the event context we're running on */
1453                 bsds->event_ptr = ev;
1454         } else if (!bsds->writeable_handler) {
1455                 uint16_t flags = tevent_fd_get_flags(bsds->fde);
1456                 flags |= TEVENT_FD_READ | TEVENT_FD_WRITE;
1457                 tevent_fd_set_flags(bsds->fde, flags);
1458         }
1459
1460         bsds->writeable_handler = handler;
1461         bsds->writeable_private = private_data;
1462
1463         return 0;
1464 }
1465
1466 static ssize_t tstream_bsd_pending_bytes(struct tstream_context *stream)
1467 {
1468         struct tstream_bsd *bsds = tstream_context_data(stream,
1469                                    struct tstream_bsd);
1470         ssize_t ret;
1471
1472         if (bsds->fd == -1) {
1473                 errno = ENOTCONN;
1474                 return -1;
1475         }
1476
1477         ret = tsocket_bsd_pending(bsds->fd);
1478
1479         return ret;
1480 }
1481
1482 struct tstream_bsd_readv_state {
1483         struct tstream_context *stream;
1484
1485         struct iovec *vector;
1486         size_t count;
1487
1488         int ret;
1489 };
1490
1491 static int tstream_bsd_readv_destructor(struct tstream_bsd_readv_state *state)
1492 {
1493         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1494                                    struct tstream_bsd);
1495
1496         tstream_bsd_set_readable_handler(bsds, NULL, NULL, NULL);
1497
1498         return 0;
1499 }
1500
1501 static void tstream_bsd_readv_handler(void *private_data);
1502
1503 static struct tevent_req *tstream_bsd_readv_send(TALLOC_CTX *mem_ctx,
1504                                         struct tevent_context *ev,
1505                                         struct tstream_context *stream,
1506                                         struct iovec *vector,
1507                                         size_t count)
1508 {
1509         struct tevent_req *req;
1510         struct tstream_bsd_readv_state *state;
1511         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1512         int ret;
1513
1514         req = tevent_req_create(mem_ctx, &state,
1515                                 struct tstream_bsd_readv_state);
1516         if (!req) {
1517                 return NULL;
1518         }
1519
1520         state->stream   = stream;
1521         /* we make a copy of the vector so that we can modify it */
1522         state->vector   = talloc_array(state, struct iovec, count);
1523         if (tevent_req_nomem(state->vector, req)) {
1524                 goto post;
1525         }
1526         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1527         state->count    = count;
1528         state->ret      = 0;
1529
1530         talloc_set_destructor(state, tstream_bsd_readv_destructor);
1531
1532         if (bsds->fd == -1) {
1533                 tevent_req_error(req, ENOTCONN);
1534                 goto post;
1535         }
1536
1537         /*
1538          * this is a fast path, not waiting for the
1539          * socket to become explicit readable gains
1540          * about 10%-20% performance in benchmark tests.
1541          */
1542         tstream_bsd_readv_handler(req);
1543         if (!tevent_req_is_in_progress(req)) {
1544                 goto post;
1545         }
1546
1547         ret = tstream_bsd_set_readable_handler(bsds, ev,
1548                                               tstream_bsd_readv_handler,
1549                                               req);
1550         if (ret == -1) {
1551                 tevent_req_error(req, errno);
1552                 goto post;
1553         }
1554
1555         return req;
1556
1557  post:
1558         tevent_req_post(req, ev);
1559         return req;
1560 }
1561
1562 static void tstream_bsd_readv_handler(void *private_data)
1563 {
1564         struct tevent_req *req = talloc_get_type_abort(private_data,
1565                                  struct tevent_req);
1566         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1567                                         struct tstream_bsd_readv_state);
1568         struct tstream_context *stream = state->stream;
1569         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1570         int ret;
1571         int err;
1572         bool retry;
1573
1574         ret = readv(bsds->fd, state->vector, state->count);
1575         if (ret == 0) {
1576                 /* propagate end of file */
1577                 tevent_req_error(req, EPIPE);
1578                 return;
1579         }
1580         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1581         if (retry) {
1582                 /* retry later */
1583                 return;
1584         }
1585         if (tevent_req_error(req, err)) {
1586                 return;
1587         }
1588
1589         state->ret += ret;
1590
1591         while (ret > 0) {
1592                 if (ret < state->vector[0].iov_len) {
1593                         uint8_t *base;
1594                         base = (uint8_t *)state->vector[0].iov_base;
1595                         base += ret;
1596                         state->vector[0].iov_base = base;
1597                         state->vector[0].iov_len -= ret;
1598                         break;
1599                 }
1600                 ret -= state->vector[0].iov_len;
1601                 state->vector += 1;
1602                 state->count -= 1;
1603         }
1604
1605         /*
1606          * there're maybe some empty vectors at the end
1607          * which we need to skip, otherwise we would get
1608          * ret == 0 from the readv() call and return EPIPE
1609          */
1610         while (state->count > 0) {
1611                 if (state->vector[0].iov_len > 0) {
1612                         break;
1613                 }
1614                 state->vector += 1;
1615                 state->count -= 1;
1616         }
1617
1618         if (state->count > 0) {
1619                 /* we have more to read */
1620                 return;
1621         }
1622
1623         tevent_req_done(req);
1624 }
1625
1626 static int tstream_bsd_readv_recv(struct tevent_req *req,
1627                                   int *perrno)
1628 {
1629         struct tstream_bsd_readv_state *state = tevent_req_data(req,
1630                                         struct tstream_bsd_readv_state);
1631         int ret;
1632
1633         ret = tsocket_simple_int_recv(req, perrno);
1634         if (ret == 0) {
1635                 ret = state->ret;
1636         }
1637
1638         tevent_req_received(req);
1639         return ret;
1640 }
1641
1642 struct tstream_bsd_writev_state {
1643         struct tstream_context *stream;
1644
1645         struct iovec *vector;
1646         size_t count;
1647
1648         int ret;
1649 };
1650
1651 static int tstream_bsd_writev_destructor(struct tstream_bsd_writev_state *state)
1652 {
1653         struct tstream_bsd *bsds = tstream_context_data(state->stream,
1654                                   struct tstream_bsd);
1655
1656         tstream_bsd_set_writeable_handler(bsds, NULL, NULL, NULL);
1657
1658         return 0;
1659 }
1660
1661 static void tstream_bsd_writev_handler(void *private_data);
1662
1663 static struct tevent_req *tstream_bsd_writev_send(TALLOC_CTX *mem_ctx,
1664                                                  struct tevent_context *ev,
1665                                                  struct tstream_context *stream,
1666                                                  const struct iovec *vector,
1667                                                  size_t count)
1668 {
1669         struct tevent_req *req;
1670         struct tstream_bsd_writev_state *state;
1671         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1672         int ret;
1673
1674         req = tevent_req_create(mem_ctx, &state,
1675                                 struct tstream_bsd_writev_state);
1676         if (!req) {
1677                 return NULL;
1678         }
1679
1680         state->stream   = stream;
1681         /* we make a copy of the vector so that we can modify it */
1682         state->vector   = talloc_array(state, struct iovec, count);
1683         if (tevent_req_nomem(state->vector, req)) {
1684                 goto post;
1685         }
1686         memcpy(state->vector, vector, sizeof(struct iovec)*count);
1687         state->count    = count;
1688         state->ret      = 0;
1689
1690         talloc_set_destructor(state, tstream_bsd_writev_destructor);
1691
1692         if (bsds->fd == -1) {
1693                 tevent_req_error(req, ENOTCONN);
1694                 goto post;
1695         }
1696
1697         /*
1698          * this is a fast path, not waiting for the
1699          * socket to become explicit writeable gains
1700          * about 10%-20% performance in benchmark tests.
1701          */
1702         tstream_bsd_writev_handler(req);
1703         if (!tevent_req_is_in_progress(req)) {
1704                 goto post;
1705         }
1706
1707         ret = tstream_bsd_set_writeable_handler(bsds, ev,
1708                                                tstream_bsd_writev_handler,
1709                                                req);
1710         if (ret == -1) {
1711                 tevent_req_error(req, errno);
1712                 goto post;
1713         }
1714
1715         return req;
1716
1717  post:
1718         tevent_req_post(req, ev);
1719         return req;
1720 }
1721
1722 static void tstream_bsd_writev_handler(void *private_data)
1723 {
1724         struct tevent_req *req = talloc_get_type_abort(private_data,
1725                                  struct tevent_req);
1726         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1727                                         struct tstream_bsd_writev_state);
1728         struct tstream_context *stream = state->stream;
1729         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1730         ssize_t ret;
1731         int err;
1732         bool retry;
1733
1734         ret = writev(bsds->fd, state->vector, state->count);
1735         if (ret == 0) {
1736                 /* propagate end of file */
1737                 tevent_req_error(req, EPIPE);
1738                 return;
1739         }
1740         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
1741         if (retry) {
1742                 /* retry later */
1743                 return;
1744         }
1745         if (tevent_req_error(req, err)) {
1746                 return;
1747         }
1748
1749         state->ret += ret;
1750
1751         while (ret > 0) {
1752                 if (ret < state->vector[0].iov_len) {
1753                         uint8_t *base;
1754                         base = (uint8_t *)state->vector[0].iov_base;
1755                         base += ret;
1756                         state->vector[0].iov_base = base;
1757                         state->vector[0].iov_len -= ret;
1758                         break;
1759                 }
1760                 ret -= state->vector[0].iov_len;
1761                 state->vector += 1;
1762                 state->count -= 1;
1763         }
1764
1765         /*
1766          * there're maybe some empty vectors at the end
1767          * which we need to skip, otherwise we would get
1768          * ret == 0 from the writev() call and return EPIPE
1769          */
1770         while (state->count > 0) {
1771                 if (state->vector[0].iov_len > 0) {
1772                         break;
1773                 }
1774                 state->vector += 1;
1775                 state->count -= 1;
1776         }
1777
1778         if (state->count > 0) {
1779                 /* we have more to read */
1780                 return;
1781         }
1782
1783         tevent_req_done(req);
1784 }
1785
1786 static int tstream_bsd_writev_recv(struct tevent_req *req, int *perrno)
1787 {
1788         struct tstream_bsd_writev_state *state = tevent_req_data(req,
1789                                         struct tstream_bsd_writev_state);
1790         int ret;
1791
1792         ret = tsocket_simple_int_recv(req, perrno);
1793         if (ret == 0) {
1794                 ret = state->ret;
1795         }
1796
1797         tevent_req_received(req);
1798         return ret;
1799 }
1800
1801 struct tstream_bsd_disconnect_state {
1802         void *__dummy;
1803 };
1804
1805 static struct tevent_req *tstream_bsd_disconnect_send(TALLOC_CTX *mem_ctx,
1806                                                      struct tevent_context *ev,
1807                                                      struct tstream_context *stream)
1808 {
1809         struct tstream_bsd *bsds = tstream_context_data(stream, struct tstream_bsd);
1810         struct tevent_req *req;
1811         struct tstream_bsd_disconnect_state *state;
1812         int ret;
1813         int err;
1814         bool dummy;
1815
1816         req = tevent_req_create(mem_ctx, &state,
1817                                 struct tstream_bsd_disconnect_state);
1818         if (req == NULL) {
1819                 return NULL;
1820         }
1821
1822         if (bsds->fd == -1) {
1823                 tevent_req_error(req, ENOTCONN);
1824                 goto post;
1825         }
1826
1827         ret = close(bsds->fd);
1828         bsds->fd = -1;
1829         err = tsocket_bsd_error_from_errno(ret, errno, &dummy);
1830         if (tevent_req_error(req, err)) {
1831                 goto post;
1832         }
1833
1834         tevent_req_done(req);
1835 post:
1836         tevent_req_post(req, ev);
1837         return req;
1838 }
1839
1840 static int tstream_bsd_disconnect_recv(struct tevent_req *req,
1841                                       int *perrno)
1842 {
1843         int ret;
1844
1845         ret = tsocket_simple_int_recv(req, perrno);
1846
1847         tevent_req_received(req);
1848         return ret;
1849 }
1850
1851 static const struct tstream_context_ops tstream_bsd_ops = {
1852         .name                   = "bsd",
1853
1854         .pending_bytes          = tstream_bsd_pending_bytes,
1855
1856         .readv_send             = tstream_bsd_readv_send,
1857         .readv_recv             = tstream_bsd_readv_recv,
1858
1859         .writev_send            = tstream_bsd_writev_send,
1860         .writev_recv            = tstream_bsd_writev_recv,
1861
1862         .disconnect_send        = tstream_bsd_disconnect_send,
1863         .disconnect_recv        = tstream_bsd_disconnect_recv,
1864 };
1865
1866 static int tstream_bsd_destructor(struct tstream_bsd *bsds)
1867 {
1868         TALLOC_FREE(bsds->fde);
1869         if (bsds->fd != -1) {
1870                 close(bsds->fd);
1871                 bsds->fd = -1;
1872         }
1873         return 0;
1874 }
1875
1876 int _tstream_bsd_existing_socket(TALLOC_CTX *mem_ctx,
1877                                  int fd,
1878                                  struct tstream_context **_stream,
1879                                  const char *location)
1880 {
1881         struct tstream_context *stream;
1882         struct tstream_bsd *bsds;
1883
1884         stream = tstream_context_create(mem_ctx,
1885                                         &tstream_bsd_ops,
1886                                         &bsds,
1887                                         struct tstream_bsd,
1888                                         location);
1889         if (!stream) {
1890                 return -1;
1891         }
1892         ZERO_STRUCTP(bsds);
1893         bsds->fd = fd;
1894         talloc_set_destructor(bsds, tstream_bsd_destructor);
1895
1896         *_stream = stream;
1897         return 0;
1898 }
1899
1900 struct tstream_bsd_connect_state {
1901         int fd;
1902         struct tevent_fd *fde;
1903         struct tstream_conext *stream;
1904 };
1905
1906 static int tstream_bsd_connect_destructor(struct tstream_bsd_connect_state *state)
1907 {
1908         TALLOC_FREE(state->fde);
1909         if (state->fd != -1) {
1910                 close(state->fd);
1911                 state->fd = -1;
1912         }
1913
1914         return 0;
1915 }
1916
1917 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
1918                                             struct tevent_fd *fde,
1919                                             uint16_t flags,
1920                                             void *private_data);
1921
1922 static struct tevent_req * tstream_bsd_connect_send(TALLOC_CTX *mem_ctx,
1923                                         struct tevent_context *ev,
1924                                         int sys_errno,
1925                                         const struct tsocket_address *local,
1926                                         const struct tsocket_address *remote)
1927 {
1928         struct tevent_req *req;
1929         struct tstream_bsd_connect_state *state;
1930         struct tsocket_address_bsd *lbsda =
1931                 talloc_get_type_abort(local->private_data,
1932                 struct tsocket_address_bsd);
1933         struct tsocket_address_bsd *rbsda =
1934                 talloc_get_type_abort(remote->private_data,
1935                 struct tsocket_address_bsd);
1936         int ret;
1937         int err;
1938         bool retry;
1939         bool do_bind = false;
1940         bool do_reuseaddr = false;
1941         socklen_t sa_socklen = sizeof(rbsda->u.ss);
1942
1943         req = tevent_req_create(mem_ctx, &state,
1944                                 struct tstream_bsd_connect_state);
1945         if (!req) {
1946                 return NULL;
1947         }
1948         state->fd = -1;
1949         state->fde = NULL;
1950
1951         talloc_set_destructor(state, tstream_bsd_connect_destructor);
1952
1953         /* give the wrappers a chance to report an error */
1954         if (sys_errno != 0) {
1955                 tevent_req_error(req, sys_errno);
1956                 goto post;
1957         }
1958
1959         switch (lbsda->u.sa.sa_family) {
1960         case AF_UNIX:
1961                 if (lbsda->u.un.sun_path[0] != 0) {
1962                         do_reuseaddr = true;
1963                         do_bind = true;
1964                 }
1965                 /*
1966                  * for unix sockets we can't use the size of sockaddr_storage
1967                  * we would get EINVAL
1968                  */
1969                 sa_socklen = sizeof(rbsda->u.un);
1970                 break;
1971         case AF_INET:
1972                 if (lbsda->u.in.sin_port != 0) {
1973                         do_reuseaddr = true;
1974                         do_bind = true;
1975                 }
1976                 if (lbsda->u.in.sin_addr.s_addr == INADDR_ANY) {
1977                         do_bind = true;
1978                 }
1979                 break;
1980 #ifdef HAVE_IPV6
1981         case AF_INET6:
1982                 if (lbsda->u.in6.sin6_port != 0) {
1983                         do_reuseaddr = true;
1984                         do_bind = true;
1985                 }
1986                 if (memcmp(&in6addr_any,
1987                            &lbsda->u.in6.sin6_addr,
1988                            sizeof(in6addr_any)) != 0) {
1989                         do_bind = true;
1990                 }
1991                 break;
1992 #endif
1993         default:
1994                 tevent_req_error(req, EINVAL);
1995                 goto post;
1996         }
1997
1998         state->fd = socket(lbsda->u.sa.sa_family, SOCK_STREAM, 0);
1999         if (state->fd == -1) {
2000                 tevent_req_error(req, errno);
2001                 goto post;
2002         }
2003
2004         state->fd = tsocket_bsd_common_prepare_fd(state->fd, true);
2005         if (state->fd == -1) {
2006                 tevent_req_error(req, errno);
2007                 goto post;
2008         }
2009
2010         if (do_reuseaddr) {
2011                 int val = 1;
2012
2013                 ret = setsockopt(state->fd, SOL_SOCKET, SO_REUSEADDR,
2014                                  (const void *)&val, sizeof(val));
2015                 if (ret == -1) {
2016                         tevent_req_error(req, errno);
2017                         goto post;
2018                 }
2019         }
2020
2021         if (do_bind) {
2022                 ret = bind(state->fd, &lbsda->u.sa, sizeof(lbsda->u.ss));
2023                 if (ret == -1) {
2024                         tevent_req_error(req, errno);
2025                         goto post;
2026                 }
2027         }
2028
2029         ret = connect(state->fd, &rbsda->u.sa, sa_socklen);
2030         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2031         if (retry) {
2032                 /* retry later */
2033                 goto async;
2034         }
2035         if (tevent_req_error(req, err)) {
2036                 goto post;
2037         }
2038
2039         tevent_req_done(req);
2040         goto post;
2041
2042  async:
2043         state->fde = tevent_add_fd(ev, state,
2044                                    state->fd,
2045                                    TEVENT_FD_READ | TEVENT_FD_WRITE,
2046                                    tstream_bsd_connect_fde_handler,
2047                                    req);
2048         if (tevent_req_nomem(state->fde, req)) {
2049                 goto post;
2050         }
2051
2052         return req;
2053
2054  post:
2055         tevent_req_post(req, ev);
2056         return req;
2057 }
2058
2059 static void tstream_bsd_connect_fde_handler(struct tevent_context *ev,
2060                                             struct tevent_fd *fde,
2061                                             uint16_t flags,
2062                                             void *private_data)
2063 {
2064         struct tevent_req *req = talloc_get_type_abort(private_data,
2065                                  struct tevent_req);
2066         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2067                                         struct tstream_bsd_connect_state);
2068         int ret;
2069         int error=0;
2070         socklen_t len = sizeof(error);
2071         int err;
2072         bool retry;
2073
2074         ret = getsockopt(state->fd, SOL_SOCKET, SO_ERROR, &error, &len);
2075         if (ret == 0) {
2076                 if (error != 0) {
2077                         errno = error;
2078                         ret = -1;
2079                 }
2080         }
2081         err = tsocket_bsd_error_from_errno(ret, errno, &retry);
2082         if (retry) {
2083                 /* retry later */
2084                 return;
2085         }
2086         if (tevent_req_error(req, err)) {
2087                 return;
2088         }
2089
2090         tevent_req_done(req);
2091 }
2092
2093 static int tstream_bsd_connect_recv(struct tevent_req *req,
2094                                     int *perrno,
2095                                     TALLOC_CTX *mem_ctx,
2096                                     struct tstream_context **stream,
2097                                     const char *location)
2098 {
2099         struct tstream_bsd_connect_state *state = tevent_req_data(req,
2100                                         struct tstream_bsd_connect_state);
2101         int ret;
2102
2103         ret = tsocket_simple_int_recv(req, perrno);
2104         if (ret == 0) {
2105                 ret = _tstream_bsd_existing_socket(mem_ctx,
2106                                                    state->fd,
2107                                                    stream,
2108                                                    location);
2109                 if (ret == -1) {
2110                         *perrno = errno;
2111                         goto done;
2112                 }
2113                 TALLOC_FREE(state->fde);
2114                 state->fd = -1;
2115         }
2116
2117 done:
2118         tevent_req_received(req);
2119         return ret;
2120 }
2121
2122 struct tevent_req * tstream_inet_tcp_connect_send(TALLOC_CTX *mem_ctx,
2123                                         struct tevent_context *ev,
2124                                         const struct tsocket_address *local,
2125                                         const struct tsocket_address *remote)
2126 {
2127         struct tsocket_address_bsd *lbsda =
2128                 talloc_get_type_abort(local->private_data,
2129                 struct tsocket_address_bsd);
2130         struct tevent_req *req;
2131         int sys_errno = 0;
2132
2133         switch (lbsda->u.sa.sa_family) {
2134         case AF_INET:
2135                 break;
2136 #ifdef HAVE_IPV6
2137         case AF_INET6:
2138                 break;
2139 #endif
2140         default:
2141                 sys_errno = EINVAL;
2142                 break;
2143         }
2144
2145         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2146
2147         return req;
2148 }
2149
2150 int _tstream_inet_tcp_connect_recv(struct tevent_req *req,
2151                                    int *perrno,
2152                                    TALLOC_CTX *mem_ctx,
2153                                    struct tstream_context **stream,
2154                                    const char *location)
2155 {
2156         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2157 }
2158
2159 struct tevent_req * tstream_unix_connect_send(TALLOC_CTX *mem_ctx,
2160                                         struct tevent_context *ev,
2161                                         const struct tsocket_address *local,
2162                                         const struct tsocket_address *remote)
2163 {
2164         struct tsocket_address_bsd *lbsda =
2165                 talloc_get_type_abort(local->private_data,
2166                 struct tsocket_address_bsd);
2167         struct tevent_req *req;
2168         int sys_errno = 0;
2169
2170         switch (lbsda->u.sa.sa_family) {
2171         case AF_UNIX:
2172                 break;
2173         default:
2174                 sys_errno = EINVAL;
2175                 break;
2176         }
2177
2178         req = tstream_bsd_connect_send(mem_ctx, ev, sys_errno, local, remote);
2179
2180         return req;
2181 }
2182
2183 int _tstream_unix_connect_recv(struct tevent_req *req,
2184                                       int *perrno,
2185                                       TALLOC_CTX *mem_ctx,
2186                                       struct tstream_context **stream,
2187                                       const char *location)
2188 {
2189         return tstream_bsd_connect_recv(req, perrno, mem_ctx, stream, location);
2190 }
2191
2192 int _tstream_unix_socketpair(TALLOC_CTX *mem_ctx1,
2193                              struct tstream_context **_stream1,
2194                              TALLOC_CTX *mem_ctx2,
2195                              struct tstream_context **_stream2,
2196                              const char *location)
2197 {
2198         int ret;
2199         int fds[2];
2200         int fd1;
2201         int fd2;
2202         struct tstream_context *stream1 = NULL;
2203         struct tstream_context *stream2 = NULL;
2204
2205         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
2206         if (ret == -1) {
2207                 return -1;
2208         }
2209         fd1 = fds[0];
2210         fd2 = fds[1];
2211
2212         fd1 = tsocket_bsd_common_prepare_fd(fd1, true);
2213         if (fd1 == -1) {
2214                 int sys_errno = errno;
2215                 close(fd2);
2216                 errno = sys_errno;
2217                 return -1;
2218         }
2219
2220         fd2 = tsocket_bsd_common_prepare_fd(fd2, true);
2221         if (fd2 == -1) {
2222                 int sys_errno = errno;
2223                 close(fd1);
2224                 errno = sys_errno;
2225                 return -1;
2226         }
2227
2228         ret = _tstream_bsd_existing_socket(mem_ctx1,
2229                                            fd1,
2230                                            &stream1,
2231                                            location);
2232         if (ret == -1) {
2233                 int sys_errno = errno;
2234                 close(fd1);
2235                 close(fd2);
2236                 errno = sys_errno;
2237                 return -1;
2238         }
2239
2240         ret = _tstream_bsd_existing_socket(mem_ctx2,
2241                                            fd2,
2242                                            &stream2,
2243                                            location);
2244         if (ret == -1) {
2245                 int sys_errno = errno;
2246                 talloc_free(stream1);
2247                 close(fd2);
2248                 errno = sys_errno;
2249                 return -1;
2250         }
2251
2252         *_stream1 = stream1;
2253         *_stream2 = stream2;
2254         return 0;
2255 }
2256