2 * Unix SMB/CIFS implementation.
3 * Copyright (C) Volker Lendecke 2013
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/select.h"
22 #include "system/time.h"
23 #include "system/network.h"
24 #include "lib/util/dlinklist.h"
25 #include "pthreadpool/pthreadpool_pipe.h"
26 #include "lib/util/iov_buf.h"
27 #include "lib/util/msghdr.h"
29 #include "lib/util/time.h"
32 * This file implements two abstractions: The "unix_dgram" functions implement
33 * queueing for unix domain datagram sockets. You can send to a destination
34 * socket, and if that has no free space available, it will fall back to an
35 * anonymous socket that will poll for writability. "unix_dgram" expects the
36 * data size not to exceed the system limit.
38 * The "unix_msg" functions implement the fragmentation of large messages on
39 * top of "unix_dgram". This is what is exposed to the user of this API.
42 struct unix_dgram_msg {
43 struct unix_dgram_msg *prev, *next;
50 struct unix_dgram_send_queue {
51 struct unix_dgram_send_queue *prev, *next;
52 struct unix_dgram_ctx *ctx;
54 struct unix_dgram_msg *msgs;
55 struct poll_timeout *timeout;
59 struct unix_dgram_ctx {
62 const struct poll_funcs *ev_funcs;
65 void (*recv_callback)(struct unix_dgram_ctx *ctx,
66 uint8_t *msg, size_t msg_len,
67 int *fds, size_t num_fds,
71 struct poll_watch *sock_read_watch;
72 struct unix_dgram_send_queue *send_queues;
74 struct pthreadpool_pipe *send_pool;
75 struct poll_watch *pool_read_watch;
81 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
84 /* Set socket non blocking. */
85 static int prepare_socket_nonblock(int sock, bool nonblock)
89 #define FLAG_TO_SET O_NONBLOCK
92 #define FLAG_TO_SET O_NDELAY
94 #define FLAG_TO_SET FNDELAY
98 flags = fcntl(sock, F_GETFL);
103 flags |= FLAG_TO_SET;
105 flags &= ~FLAG_TO_SET;
107 if (fcntl(sock, F_SETFL, flags) == -1) {
115 /* Set socket close on exec. */
116 static int prepare_socket_cloexec(int sock)
121 flags = fcntl(sock, F_GETFD, 0);
126 if (fcntl(sock, F_SETFD, flags) == -1) {
133 /* Set socket non blocking and close on exec. */
134 static int prepare_socket(int sock)
136 int ret = prepare_socket_nonblock(sock, true);
141 return prepare_socket_cloexec(sock);
144 static size_t unix_dgram_msg_size(void)
146 size_t msgsize = sizeof(struct unix_dgram_msg);
147 msgsize = (msgsize + 15) & ~15; /* align to 16 */
151 static struct msghdr_buf *unix_dgram_msghdr(struct unix_dgram_msg *msg)
154 * Not portable in C99, but "msg" is aligned and so is
155 * unix_dgram_msg_size()
157 return (struct msghdr_buf *)(((char *)msg) + unix_dgram_msg_size());
160 static void close_fd_array(int *fds, size_t num_fds)
164 for (i = 0; i < num_fds; i++) {
174 static void close_fd_array_dgram_msg(struct unix_dgram_msg *dmsg)
176 struct msghdr_buf *hdr = unix_dgram_msghdr(dmsg);
177 struct msghdr *msg = msghdr_buf_msghdr(hdr);
178 size_t num_fds = msghdr_extract_fds(msg, NULL, 0);
181 msghdr_extract_fds(msg, fds, num_fds);
183 close_fd_array(fds, num_fds);
186 static int unix_dgram_init(const struct sockaddr_un *addr, size_t max_msg,
187 const struct poll_funcs *ev_funcs,
188 void (*recv_callback)(struct unix_dgram_ctx *ctx,
189 uint8_t *msg, size_t msg_len,
190 int *fds, size_t num_fds,
193 struct unix_dgram_ctx **result)
195 struct unix_dgram_ctx *ctx;
200 pathlen = strlen(addr->sun_path)+1;
205 ctx = malloc(offsetof(struct unix_dgram_ctx, path) + pathlen);
210 memcpy(ctx->path, addr->sun_path, pathlen);
215 *ctx = (struct unix_dgram_ctx) {
217 .ev_funcs = ev_funcs,
218 .recv_callback = recv_callback,
219 .private_data = private_data,
220 .created_pid = (pid_t)-1
223 ctx->recv_buf = malloc(max_msg);
224 if (ctx->recv_buf == NULL) {
229 ctx->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
230 if (ctx->sock == -1) {
235 /* Set non-blocking and close-on-exec. */
236 ret = prepare_socket(ctx->sock);
242 ret = bind(ctx->sock,
243 (const struct sockaddr *)(const void *)addr,
250 ctx->created_pid = getpid();
252 ctx->sock_read_watch = ctx->ev_funcs->watch_new(
253 ctx->ev_funcs, ctx->sock, POLLIN,
254 unix_dgram_recv_handler, ctx);
256 if (ctx->sock_read_watch == NULL) {
273 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
276 struct unix_dgram_ctx *ctx = (struct unix_dgram_ctx *)private_data;
281 size_t bufsize = msghdr_prep_recv_fds(NULL, NULL, 0, INT8_MAX);
282 uint8_t buf[bufsize];
284 iov = (struct iovec) {
285 .iov_base = (void *)ctx->recv_buf,
286 .iov_len = ctx->max_msg,
289 msg = (struct msghdr) {
294 msghdr_prep_recv_fds(&msg, buf, bufsize, INT8_MAX);
296 #ifdef MSG_CMSG_CLOEXEC
297 flags |= MSG_CMSG_CLOEXEC;
300 received = recvmsg(fd, &msg, flags);
301 if (received == -1) {
302 if ((errno == EAGAIN) ||
303 (errno == EWOULDBLOCK) ||
304 (errno == EINTR) || (errno == ENOMEM)) {
305 /* Not really an error - just try again. */
308 /* Problem with the socket. Set it unreadable. */
309 ctx->ev_funcs->watch_update(w, 0);
312 if (received > ctx->max_msg) {
313 /* More than we expected, not for us */
318 size_t num_fds = msghdr_extract_fds(&msg, NULL, 0);
322 msghdr_extract_fds(&msg, fds, num_fds);
324 for (i = 0; i < num_fds; i++) {
327 err = prepare_socket_cloexec(fds[i]);
329 close_fd_array(fds, num_fds);
334 ctx->recv_callback(ctx, ctx->recv_buf, received,
335 fds, num_fds, ctx->private_data);
339 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
342 static int unix_dgram_init_pthreadpool(struct unix_dgram_ctx *ctx)
346 if (ctx->send_pool != NULL) {
350 ret = pthreadpool_pipe_init(0, &ctx->send_pool);
355 signalfd = pthreadpool_pipe_signal_fd(ctx->send_pool);
357 ctx->pool_read_watch = ctx->ev_funcs->watch_new(
358 ctx->ev_funcs, signalfd, POLLIN,
359 unix_dgram_job_finished, ctx);
360 if (ctx->pool_read_watch == NULL) {
361 pthreadpool_pipe_destroy(ctx->send_pool);
362 ctx->send_pool = NULL;
369 static int unix_dgram_sendq_schedule_free(struct unix_dgram_send_queue *q);
371 static int unix_dgram_send_queue_init(
372 struct unix_dgram_ctx *ctx, const struct sockaddr_un *dst,
373 struct unix_dgram_send_queue **result)
375 struct unix_dgram_send_queue *q;
379 pathlen = strlen(dst->sun_path)+1;
381 q = malloc(offsetof(struct unix_dgram_send_queue, path) + pathlen);
388 memcpy(q->path, dst->sun_path, pathlen);
390 q->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
396 err = prepare_socket_cloexec(q->sock);
402 ret = connect(q->sock,
403 (const struct sockaddr *)(const void *)dst,
405 } while ((ret == -1) && (errno == EINTR));
412 err = unix_dgram_init_pthreadpool(ctx);
417 DLIST_ADD(ctx->send_queues, q);
419 ret = unix_dgram_sendq_schedule_free(q);
435 static void unix_dgram_send_queue_free(struct unix_dgram_send_queue *q)
437 struct unix_dgram_ctx *ctx = q->ctx;
439 while (q->msgs != NULL) {
440 struct unix_dgram_msg *msg;
442 DLIST_REMOVE(q->msgs, msg);
443 close_fd_array_dgram_msg(msg);
447 DLIST_REMOVE(ctx->send_queues, q);
448 ctx->ev_funcs->timeout_free(q->timeout);
452 static void unix_dgram_sendq_scheduled_free_handler(
453 struct poll_timeout *t, void *private_data);
455 static int unix_dgram_sendq_schedule_free(struct unix_dgram_send_queue *q)
457 struct unix_dgram_ctx *ctx = q->ctx;
458 struct timeval timeout;
460 if (q->timeout != NULL) {
464 GetTimeOfDay(&timeout);
465 timeout.tv_sec += SENDQ_CACHE_TIME_SECS;
467 q->timeout = ctx->ev_funcs->timeout_new(
470 unix_dgram_sendq_scheduled_free_handler,
472 if (q->timeout == NULL) {
473 unix_dgram_send_queue_free(q);
480 static void unix_dgram_sendq_scheduled_free_handler(struct poll_timeout *t,
483 struct unix_dgram_send_queue *q = private_data;
486 q->ctx->ev_funcs->timeout_free(q->timeout);
489 if (q->msgs == NULL) {
490 unix_dgram_send_queue_free(q);
494 ret = unix_dgram_sendq_schedule_free(q);
496 unix_dgram_send_queue_free(q);
501 static int find_send_queue(struct unix_dgram_ctx *ctx,
502 const struct sockaddr_un *dst,
503 struct unix_dgram_send_queue **ps)
505 struct unix_dgram_send_queue *s;
507 for (s = ctx->send_queues; s != NULL; s = s->next) {
508 if (strcmp(s->path, dst->sun_path) == 0) {
516 static int queue_msg(struct unix_dgram_send_queue *q,
517 const struct iovec *iov, int iovcnt,
518 const int *fds, size_t num_fds)
520 struct unix_dgram_msg *msg;
521 struct msghdr_buf *hdr;
522 size_t msglen, needed;
524 int fds_copy[MIN(num_fds, INT8_MAX)];
527 for (i=0; i<num_fds; i++) {
531 for (i = 0; i < num_fds; i++) {
532 fds_copy[i] = dup(fds[i]);
533 if (fds_copy[i] == -1) {
539 msglen = unix_dgram_msg_size();
541 msghdrlen = msghdr_copy(NULL, 0, NULL, 0, iov, iovcnt,
543 if (msghdrlen == -1) {
548 needed = msglen + msghdrlen;
549 if (needed < msglen) {
554 msg = malloc(needed);
559 hdr = unix_dgram_msghdr(msg);
562 msghdr_copy(hdr, msghdrlen, NULL, 0, iov, iovcnt,
565 DLIST_ADD_END(q->msgs, msg);
568 close_fd_array(fds_copy, num_fds);
572 static void unix_dgram_send_job(void *private_data)
574 struct unix_dgram_msg *dmsg = private_data;
577 struct msghdr_buf *hdr = unix_dgram_msghdr(dmsg);
578 struct msghdr *msg = msghdr_buf_msghdr(hdr);
579 dmsg->sent = sendmsg(dmsg->sock, msg, 0);
580 } while ((dmsg->sent == -1) && (errno == EINTR));
582 if (dmsg->sent == -1) {
583 dmsg->sys_errno = errno;
587 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
590 struct unix_dgram_ctx *ctx = private_data;
591 struct unix_dgram_send_queue *q;
592 struct unix_dgram_msg *msg;
595 ret = pthreadpool_pipe_finished_jobs(ctx->send_pool, &job, 1);
600 for (q = ctx->send_queues; q != NULL; q = q->next) {
601 if (job == q->sock) {
607 /* Huh? Should not happen */
612 DLIST_REMOVE(q->msgs, msg);
613 close_fd_array_dgram_msg(msg);
616 if (q->msgs != NULL) {
617 ret = pthreadpool_pipe_add_job(ctx->send_pool, q->sock,
618 unix_dgram_send_job, q->msgs);
620 unix_dgram_send_queue_free(q);
627 static int unix_dgram_send(struct unix_dgram_ctx *ctx,
628 const struct sockaddr_un *dst,
629 const struct iovec *iov, int iovlen,
630 const int *fds, size_t num_fds)
632 struct unix_dgram_send_queue *q;
638 if (num_fds > INT8_MAX) {
642 #if !defined(HAVE_STRUCT_MSGHDR_MSG_CONTROL) && !defined(HAVE_STRUCT_MSGHDR_MSG_ACCRIGHTS)
648 for (i = 0; i < num_fds; i++) {
650 * Make sure we only allow fd passing
651 * for communication channels,
652 * e.g. sockets, pipes, fifos, ...
654 ret = lseek(fds[i], 0, SEEK_CUR);
655 if (ret == -1 && errno == ESPIPE) {
661 * Reject the message as we may need to call dup(),
662 * if we queue the message.
664 * That might result in unexpected behavior for the caller
665 * for files and broken posix locking.
671 * To preserve message ordering, we have to queue a message when
672 * others are waiting in line already.
674 ret = find_send_queue(ctx, dst, &q);
676 return queue_msg(q, iov, iovlen, fds, num_fds);
680 * Try a cheap nonblocking send
683 msg = (struct msghdr) {
684 .msg_name = discard_const_p(struct sockaddr_un, dst),
685 .msg_namelen = sizeof(*dst),
686 .msg_iov = discard_const_p(struct iovec, iov),
690 fdlen = msghdr_prep_fds(&msg, NULL, 0, fds, num_fds);
697 msghdr_prep_fds(&msg, buf, fdlen, fds, num_fds);
699 ret = sendmsg(ctx->sock, &msg, 0);
705 if ((errno != EWOULDBLOCK) &&
708 /* FreeBSD can give this for large messages */
709 (errno != ENOBUFS) &&
715 ret = unix_dgram_send_queue_init(ctx, dst, &q);
719 ret = queue_msg(q, iov, iovlen, fds, num_fds);
721 unix_dgram_send_queue_free(q);
724 ret = pthreadpool_pipe_add_job(ctx->send_pool, q->sock,
725 unix_dgram_send_job, q->msgs);
727 unix_dgram_send_queue_free(q);
733 static int unix_dgram_sock(struct unix_dgram_ctx *ctx)
738 static int unix_dgram_free(struct unix_dgram_ctx *ctx)
740 if (ctx->send_queues != NULL) {
744 if (ctx->send_pool != NULL) {
745 int ret = pthreadpool_pipe_destroy(ctx->send_pool);
749 ctx->ev_funcs->watch_free(ctx->pool_read_watch);
752 ctx->ev_funcs->watch_free(ctx->sock_read_watch);
755 if (getpid() == ctx->created_pid) {
756 /* If we created it, unlink. Otherwise someone else might
757 * still have it open */
767 * Every message starts with a uint64_t cookie.
769 * A value of 0 indicates a single-fragment message which is complete in
770 * itself. The data immediately follows the cookie.
772 * Every multi-fragment message has a cookie != 0 and starts with a cookie
773 * followed by a struct unix_msg_header and then the data. The pid and sock
774 * fields are used to assure uniqueness on the receiver side.
777 struct unix_msg_hdr {
784 struct unix_msg *prev, *next;
793 struct unix_msg_ctx {
794 struct unix_dgram_ctx *dgram;
798 void (*recv_callback)(struct unix_msg_ctx *ctx,
799 uint8_t *msg, size_t msg_len,
800 int *fds, size_t num_fds,
804 struct unix_msg *msgs;
807 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
808 uint8_t *buf, size_t buflen,
809 int *fds, size_t num_fds,
812 int unix_msg_init(const struct sockaddr_un *addr,
813 const struct poll_funcs *ev_funcs,
815 void (*recv_callback)(struct unix_msg_ctx *ctx,
816 uint8_t *msg, size_t msg_len,
817 int *fds, size_t num_fds,
820 struct unix_msg_ctx **result)
822 struct unix_msg_ctx *ctx;
825 ctx = malloc(sizeof(*ctx));
830 *ctx = (struct unix_msg_ctx) {
831 .fragment_len = fragment_len,
833 .recv_callback = recv_callback,
834 .private_data = private_data
837 ret = unix_dgram_init(addr, fragment_len, ev_funcs,
838 unix_msg_recv, ctx, &ctx->dgram);
848 int unix_msg_send(struct unix_msg_ctx *ctx, const struct sockaddr_un *dst,
849 const struct iovec *iov, int iovlen,
850 const int *fds, size_t num_fds)
855 struct iovec iov_copy[iovlen+2];
856 struct unix_msg_hdr hdr;
857 struct iovec src_iov;
863 msglen = iov_buflen(iov, iovlen);
868 if (num_fds > INT8_MAX) {
872 if (msglen <= (ctx->fragment_len - sizeof(uint64_t))) {
875 iov_copy[0].iov_base = &cookie;
876 iov_copy[0].iov_len = sizeof(cookie);
878 memcpy(&iov_copy[1], iov,
879 sizeof(struct iovec) * iovlen);
882 return unix_dgram_send(ctx->dgram, dst, iov_copy, iovlen+1,
886 hdr = (struct unix_msg_hdr) {
889 .sock = unix_dgram_sock(ctx->dgram)
892 iov_copy[0].iov_base = &ctx->cookie;
893 iov_copy[0].iov_len = sizeof(ctx->cookie);
894 iov_copy[1].iov_base = &hdr;
895 iov_copy[1].iov_len = sizeof(hdr);
901 * The following write loop sends the user message in pieces. We have
902 * filled the first two iovecs above with "cookie" and "hdr". In the
903 * following loops we pull message chunks from the user iov array and
904 * fill iov_copy piece by piece, possibly truncating chunks from the
905 * caller's iov array. Ugly, but hopefully efficient.
908 while (sent < msglen) {
910 size_t iov_index = 2;
912 fragment_len = sizeof(ctx->cookie) + sizeof(hdr);
914 while (fragment_len < ctx->fragment_len) {
917 space = ctx->fragment_len - fragment_len;
918 chunk = MIN(space, src_iov.iov_len);
920 iov_copy[iov_index].iov_base = src_iov.iov_base;
921 iov_copy[iov_index].iov_len = chunk;
924 src_iov.iov_base = (char *)src_iov.iov_base + chunk;
925 src_iov.iov_len -= chunk;
926 fragment_len += chunk;
928 if (src_iov.iov_len == 0) {
937 sent += (fragment_len - sizeof(ctx->cookie) - sizeof(hdr));
940 * only the last fragment should pass the fd array.
941 * That simplifies the receiver a lot.
944 ret = unix_dgram_send(ctx->dgram, dst,
948 ret = unix_dgram_send(ctx->dgram, dst,
958 if (ctx->cookie == 0) {
965 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
966 uint8_t *buf, size_t buflen,
967 int *fds, size_t num_fds,
970 struct unix_msg_ctx *ctx = (struct unix_msg_ctx *)private_data;
971 struct unix_msg_hdr hdr;
972 struct unix_msg *msg;
976 if (buflen < sizeof(cookie)) {
980 memcpy(&cookie, buf, sizeof(cookie));
982 buf += sizeof(cookie);
983 buflen -= sizeof(cookie);
986 ctx->recv_callback(ctx, buf, buflen, fds, num_fds,
991 if (buflen < sizeof(hdr)) {
994 memcpy(&hdr, buf, sizeof(hdr));
997 buflen -= sizeof(hdr);
999 for (msg = ctx->msgs; msg != NULL; msg = msg->next) {
1000 if ((msg->sender_pid == hdr.pid) &&
1001 (msg->sender_sock == hdr.sock)) {
1006 if ((msg != NULL) && (msg->cookie != cookie)) {
1007 DLIST_REMOVE(ctx->msgs, msg);
1013 msg = malloc(offsetof(struct unix_msg, buf) + hdr.msglen);
1017 *msg = (struct unix_msg) {
1018 .msglen = hdr.msglen,
1019 .sender_pid = hdr.pid,
1020 .sender_sock = hdr.sock,
1023 DLIST_ADD(ctx->msgs, msg);
1026 space = msg->msglen - msg->received;
1027 if (buflen > space) {
1031 memcpy(msg->buf + msg->received, buf, buflen);
1032 msg->received += buflen;
1034 if (msg->received < msg->msglen) {
1038 DLIST_REMOVE(ctx->msgs, msg);
1039 ctx->recv_callback(ctx, msg->buf, msg->msglen, fds, num_fds,
1045 close_fd_array(fds, num_fds);
1048 int unix_msg_free(struct unix_msg_ctx *ctx)
1052 ret = unix_dgram_free(ctx->dgram);
1057 while (ctx->msgs != NULL) {
1058 struct unix_msg *msg = ctx->msgs;
1059 DLIST_REMOVE(ctx->msgs, msg);