2 * Unix SMB/CIFS implementation.
3 * Copyright (C) Volker Lendecke 2013
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/select.h"
22 #include "system/time.h"
23 #include "system/network.h"
24 #include "dlinklist.h"
25 #include "pthreadpool/pthreadpool.h"
26 #include "lib/iov_buf.h"
30 * This file implements two abstractions: The "unix_dgram" functions implement
31 * queueing for unix domain datagram sockets. You can send to a destination
32 * socket, and if that has no free space available, it will fall back to an
33 * anonymous socket that will poll for writability. "unix_dgram" expects the
34 * data size not to exceed the system limit.
36 * The "unix_msg" functions implement the fragmentation of large messages on
37 * top of "unix_dgram". This is what is exposed to the user of this API.
40 struct unix_dgram_msg {
41 struct unix_dgram_msg *prev, *next;
50 struct unix_dgram_send_queue {
51 struct unix_dgram_send_queue *prev, *next;
52 struct unix_dgram_ctx *ctx;
54 struct unix_dgram_msg *msgs;
58 struct unix_dgram_ctx {
61 const struct poll_funcs *ev_funcs;
64 void (*recv_callback)(struct unix_dgram_ctx *ctx,
65 uint8_t *msg, size_t msg_len,
66 int *fds, size_t num_fds,
70 struct poll_watch *sock_read_watch;
71 struct unix_dgram_send_queue *send_queues;
73 struct pthreadpool *send_pool;
74 struct poll_watch *pool_read_watch;
80 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
83 /* Set socket non blocking. */
84 static int prepare_socket_nonblock(int sock)
88 #define FLAG_TO_SET O_NONBLOCK
91 #define FLAG_TO_SET O_NDELAY
93 #define FLAG_TO_SET FNDELAY
97 flags = fcntl(sock, F_GETFL);
101 flags |= FLAG_TO_SET;
102 if (fcntl(sock, F_SETFL, flags) == -1) {
110 /* Set socket close on exec. */
111 static int prepare_socket_cloexec(int sock)
116 flags = fcntl(sock, F_GETFD, 0);
121 if (fcntl(sock, F_SETFD, flags) == -1) {
128 /* Set socket non blocking and close on exec. */
129 static int prepare_socket(int sock)
131 int ret = prepare_socket_nonblock(sock);
136 return prepare_socket_cloexec(sock);
139 static void extract_fd_array_from_msghdr(struct msghdr *msg, int **fds,
142 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
143 struct cmsghdr *cmsg;
145 for(cmsg = CMSG_FIRSTHDR(msg);
147 cmsg = CMSG_NXTHDR(msg, cmsg))
149 void *data = CMSG_DATA(cmsg);
151 if (cmsg->cmsg_type != SCM_RIGHTS) {
154 if (cmsg->cmsg_level != SOL_SOCKET) {
159 *num_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof (int);
165 static void close_fd_array(int *fds, size_t num_fds)
169 for (i = 0; i < num_fds; i++) {
179 static void close_fd_array_cmsg(struct msghdr *msg)
184 extract_fd_array_from_msghdr(msg, &fds, &num_fds);
187 * TODO: caveat - side-effect - changing msg ???
189 close_fd_array(fds, num_fds);
192 static int unix_dgram_init(const struct sockaddr_un *addr, size_t max_msg,
193 const struct poll_funcs *ev_funcs,
194 void (*recv_callback)(struct unix_dgram_ctx *ctx,
195 uint8_t *msg, size_t msg_len,
196 int *fds, size_t num_fds,
199 struct unix_dgram_ctx **result)
201 struct unix_dgram_ctx *ctx;
206 pathlen = strlen(addr->sun_path)+1;
211 ctx = malloc(offsetof(struct unix_dgram_ctx, path) + pathlen);
216 memcpy(ctx->path, addr->sun_path, pathlen);
221 *ctx = (struct unix_dgram_ctx) {
223 .ev_funcs = ev_funcs,
224 .recv_callback = recv_callback,
225 .private_data = private_data,
226 .created_pid = (pid_t)-1
229 ctx->recv_buf = malloc(max_msg);
230 if (ctx->recv_buf == NULL) {
235 ctx->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
236 if (ctx->sock == -1) {
241 /* Set non-blocking and close-on-exec. */
242 ret = prepare_socket(ctx->sock);
248 ret = bind(ctx->sock,
249 (const struct sockaddr *)(const void *)addr,
256 ctx->created_pid = getpid();
258 ctx->sock_read_watch = ctx->ev_funcs->watch_new(
259 ctx->ev_funcs, ctx->sock, POLLIN,
260 unix_dgram_recv_handler, ctx);
262 if (ctx->sock_read_watch == NULL) {
279 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
282 struct unix_dgram_ctx *ctx = (struct unix_dgram_ctx *)private_data;
287 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
288 char buf[CMSG_SPACE(sizeof(int)*INT8_MAX)] = { 0, };
289 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
291 size_t i, num_fds = 0;
293 iov = (struct iovec) {
294 .iov_base = (void *)ctx->recv_buf,
295 .iov_len = ctx->max_msg,
298 msg = (struct msghdr) {
301 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
303 .msg_controllen = sizeof(buf),
307 #ifdef MSG_CMSG_CLOEXEC
308 flags |= MSG_CMSG_CLOEXEC;
311 received = recvmsg(fd, &msg, flags);
312 if (received == -1) {
313 if ((errno == EAGAIN) ||
314 (errno == EWOULDBLOCK) ||
315 (errno == EINTR) || (errno == ENOMEM)) {
316 /* Not really an error - just try again. */
319 /* Problem with the socket. Set it unreadable. */
320 ctx->ev_funcs->watch_update(w, 0);
323 if (received > ctx->max_msg) {
324 /* More than we expected, not for us */
328 extract_fd_array_from_msghdr(&msg, &fds, &num_fds);
330 for (i = 0; i < num_fds; i++) {
333 err = prepare_socket_cloexec(fds[i]);
339 ctx->recv_callback(ctx, ctx->recv_buf, received,
340 fds, num_fds, ctx->private_data);
343 * Close those fds that the callback has not set to -1.
345 close_fd_array(fds, num_fds);
350 close_fd_array(fds, num_fds);
352 ctx->recv_callback(ctx, ctx->recv_buf, received,
353 NULL, 0, ctx->private_data);
356 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
359 static int unix_dgram_init_pthreadpool(struct unix_dgram_ctx *ctx)
363 if (ctx->send_pool != NULL) {
367 ret = pthreadpool_init(0, &ctx->send_pool);
372 signalfd = pthreadpool_signal_fd(ctx->send_pool);
374 ctx->pool_read_watch = ctx->ev_funcs->watch_new(
375 ctx->ev_funcs, signalfd, POLLIN,
376 unix_dgram_job_finished, ctx);
377 if (ctx->pool_read_watch == NULL) {
378 pthreadpool_destroy(ctx->send_pool);
379 ctx->send_pool = NULL;
386 static int unix_dgram_send_queue_init(
387 struct unix_dgram_ctx *ctx, const struct sockaddr_un *dst,
388 struct unix_dgram_send_queue **result)
390 struct unix_dgram_send_queue *q;
394 pathlen = strlen(dst->sun_path)+1;
396 q = malloc(offsetof(struct unix_dgram_send_queue, path) + pathlen);
402 memcpy(q->path, dst->sun_path, pathlen);
404 q->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
410 err = prepare_socket_cloexec(q->sock);
416 ret = connect(q->sock,
417 (const struct sockaddr *)(const void *)dst,
419 } while ((ret == -1) && (errno == EINTR));
426 err = unix_dgram_init_pthreadpool(ctx);
431 DLIST_ADD(ctx->send_queues, q);
443 static void unix_dgram_send_queue_free(struct unix_dgram_send_queue *q)
445 struct unix_dgram_ctx *ctx = q->ctx;
447 while (q->msgs != NULL) {
448 struct unix_dgram_msg *msg;
450 DLIST_REMOVE(q->msgs, msg);
451 close_fd_array_cmsg(&msg->msg);
455 DLIST_REMOVE(ctx->send_queues, q);
459 static struct unix_dgram_send_queue *find_send_queue(
460 struct unix_dgram_ctx *ctx, const char *dst_sock)
462 struct unix_dgram_send_queue *s;
464 for (s = ctx->send_queues; s != NULL; s = s->next) {
465 if (strcmp(s->path, dst_sock) == 0) {
472 static int queue_msg(struct unix_dgram_send_queue *q,
473 const struct iovec *iov, int iovlen,
474 const int *fds, size_t num_fds)
476 struct unix_dgram_msg *msg;
479 size_t msglen = sizeof(struct unix_dgram_msg);
483 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
484 size_t fds_size = sizeof(int) * MIN(num_fds, INT8_MAX);
485 int fds_copy[MIN(num_fds, INT8_MAX)];
486 size_t cmsg_len = CMSG_LEN(fds_size);
487 size_t cmsg_space = CMSG_SPACE(fds_size);
491 * Note: No need to check for overflow here,
492 * since cmsg will store <= INT8_MAX fds.
494 msglen += cmsg_space;
496 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
498 if (num_fds > INT8_MAX) {
502 #ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
508 data_len = iov_buflen(iov, iovlen);
509 if (data_len == -1) {
513 tmp = msglen + data_len;
514 if ((tmp < msglen) || (tmp < data_len)) {
520 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
521 for (i = 0; i < num_fds; i++) {
525 for (i = 0; i < num_fds; i++) {
526 fds_copy[i] = dup(fds[i]);
527 if (fds_copy[i] == -1) {
534 msg = malloc(msglen);
542 data_buf = (uint8_t *)(msg + 1);
544 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
546 cmsg_buf = (char *)data_buf;
547 memset(cmsg_buf, 0, cmsg_space);
548 data_buf += cmsg_space;
555 msg->iov = (struct iovec) {
556 .iov_base = (void *)data_buf,
560 msg->msg = (struct msghdr) {
561 .msg_iov = &msg->iov,
563 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
564 .msg_control = cmsg_buf,
565 .msg_controllen = cmsg_space,
569 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
571 struct cmsghdr *cmsg;
574 cmsg = CMSG_FIRSTHDR(&msg->msg);
575 cmsg->cmsg_level = SOL_SOCKET;
576 cmsg->cmsg_type = SCM_RIGHTS;
577 cmsg->cmsg_len = cmsg_len;
578 fdptr = CMSG_DATA(cmsg);
579 memcpy(fdptr, fds_copy, fds_size);
580 msg->msg.msg_controllen = cmsg->cmsg_len;
582 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
584 iov_buf(iov, iovlen, data_buf, data_len);
586 DLIST_ADD_END(q->msgs, msg, struct unix_dgram_msg);
590 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
591 close_fd_array(fds_copy, num_fds);
596 static void unix_dgram_send_job(void *private_data)
598 struct unix_dgram_msg *dmsg = private_data;
601 dmsg->sent = sendmsg(dmsg->sock, &dmsg->msg, 0);
602 } while ((dmsg->sent == -1) && (errno == EINTR));
604 if (dmsg->sent == -1) {
605 dmsg->sys_errno = errno;
609 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
612 struct unix_dgram_ctx *ctx = private_data;
613 struct unix_dgram_send_queue *q;
614 struct unix_dgram_msg *msg;
617 ret = pthreadpool_finished_jobs(ctx->send_pool, &job, 1);
622 for (q = ctx->send_queues; q != NULL; q = q->next) {
623 if (job == q->sock) {
629 /* Huh? Should not happen */
634 DLIST_REMOVE(q->msgs, msg);
635 close_fd_array_cmsg(&msg->msg);
638 if (q->msgs != NULL) {
639 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
640 unix_dgram_send_job, q->msgs);
646 unix_dgram_send_queue_free(q);
649 static int unix_dgram_send(struct unix_dgram_ctx *ctx,
650 const struct sockaddr_un *dst,
651 const struct iovec *iov, int iovlen,
652 const int *fds, size_t num_fds)
654 struct unix_dgram_send_queue *q;
656 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
657 struct cmsghdr *cmsg;
658 size_t fds_size = sizeof(int) * num_fds;
659 size_t cmsg_len = CMSG_LEN(fds_size);
660 size_t cmsg_space = CMSG_SPACE(fds_size);
661 char cmsg_buf[cmsg_space];
662 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
666 if (num_fds > INT8_MAX) {
670 #ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
674 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
676 for (i = 0; i < num_fds; i++) {
678 * Make sure we only allow fd passing
679 * for communication channels,
680 * e.g. sockets, pipes, fifos, ...
682 ret = lseek(fds[i], 0, SEEK_CUR);
683 if (ret == -1 && errno == ESPIPE) {
689 * Reject the message as we may need to call dup(),
690 * if we queue the message.
692 * That might result in unexpected behavior for the caller
693 * for files and broken posix locking.
699 * To preserve message ordering, we have to queue a message when
700 * others are waiting in line already.
702 q = find_send_queue(ctx, dst->sun_path);
704 return queue_msg(q, iov, iovlen, fds, num_fds);
708 * Try a cheap nonblocking send
711 msg = (struct msghdr) {
712 .msg_name = discard_const_p(struct sockaddr_un, dst),
713 .msg_namelen = sizeof(*dst),
714 .msg_iov = discard_const_p(struct iovec, iov),
717 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
721 memset(cmsg_buf, 0, cmsg_space);
723 msg.msg_control = cmsg_buf;
724 msg.msg_controllen = cmsg_space;
725 cmsg = CMSG_FIRSTHDR(&msg);
726 cmsg->cmsg_level = SOL_SOCKET;
727 cmsg->cmsg_type = SCM_RIGHTS;
728 cmsg->cmsg_len = cmsg_len;
729 fdptr = CMSG_DATA(cmsg);
730 memcpy(fdptr, fds, fds_size);
731 msg.msg_controllen = cmsg->cmsg_len;
733 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
735 ret = sendmsg(ctx->sock, &msg, 0);
739 if ((errno != EWOULDBLOCK) &&
742 /* FreeBSD can give this for large messages */
743 (errno != ENOBUFS) &&
749 ret = unix_dgram_send_queue_init(ctx, dst, &q);
753 ret = queue_msg(q, iov, iovlen, fds, num_fds);
755 unix_dgram_send_queue_free(q);
758 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
759 unix_dgram_send_job, q->msgs);
761 unix_dgram_send_queue_free(q);
767 static int unix_dgram_sock(struct unix_dgram_ctx *ctx)
772 static int unix_dgram_free(struct unix_dgram_ctx *ctx)
774 if (ctx->send_queues != NULL) {
778 if (ctx->send_pool != NULL) {
779 int ret = pthreadpool_destroy(ctx->send_pool);
783 ctx->ev_funcs->watch_free(ctx->pool_read_watch);
786 ctx->ev_funcs->watch_free(ctx->sock_read_watch);
788 if (getpid() == ctx->created_pid) {
789 /* If we created it, unlink. Otherwise someone else might
790 * still have it open */
801 * Every message starts with a uint64_t cookie.
803 * A value of 0 indicates a single-fragment message which is complete in
804 * itself. The data immediately follows the cookie.
806 * Every multi-fragment message has a cookie != 0 and starts with a cookie
807 * followed by a struct unix_msg_header and then the data. The pid and sock
808 * fields are used to assure uniqueness on the receiver side.
811 struct unix_msg_hdr {
818 struct unix_msg *prev, *next;
827 struct unix_msg_ctx {
828 struct unix_dgram_ctx *dgram;
832 void (*recv_callback)(struct unix_msg_ctx *ctx,
833 uint8_t *msg, size_t msg_len,
834 int *fds, size_t num_fds,
838 struct unix_msg *msgs;
841 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
842 uint8_t *buf, size_t buflen,
843 int *fds, size_t num_fds,
846 int unix_msg_init(const struct sockaddr_un *addr,
847 const struct poll_funcs *ev_funcs,
848 size_t fragment_len, uint64_t cookie,
849 void (*recv_callback)(struct unix_msg_ctx *ctx,
850 uint8_t *msg, size_t msg_len,
851 int *fds, size_t num_fds,
854 struct unix_msg_ctx **result)
856 struct unix_msg_ctx *ctx;
859 ctx = malloc(sizeof(*ctx));
864 *ctx = (struct unix_msg_ctx) {
865 .fragment_len = fragment_len,
867 .recv_callback = recv_callback,
868 .private_data = private_data
871 ret = unix_dgram_init(addr, fragment_len, ev_funcs,
872 unix_msg_recv, ctx, &ctx->dgram);
882 int unix_msg_send(struct unix_msg_ctx *ctx, const struct sockaddr_un *dst,
883 const struct iovec *iov, int iovlen,
884 const int *fds, size_t num_fds)
889 struct iovec iov_copy[iovlen+2];
890 struct unix_msg_hdr hdr;
891 struct iovec src_iov;
897 msglen = iov_buflen(iov, iovlen);
902 #ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
906 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
908 if (num_fds > INT8_MAX) {
912 if (msglen <= (ctx->fragment_len - sizeof(uint64_t))) {
915 iov_copy[0].iov_base = &cookie;
916 iov_copy[0].iov_len = sizeof(cookie);
918 memcpy(&iov_copy[1], iov,
919 sizeof(struct iovec) * iovlen);
922 return unix_dgram_send(ctx->dgram, dst, iov_copy, iovlen+1,
926 hdr = (struct unix_msg_hdr) {
929 .sock = unix_dgram_sock(ctx->dgram)
932 iov_copy[0].iov_base = &ctx->cookie;
933 iov_copy[0].iov_len = sizeof(ctx->cookie);
934 iov_copy[1].iov_base = &hdr;
935 iov_copy[1].iov_len = sizeof(hdr);
941 * The following write loop sends the user message in pieces. We have
942 * filled the first two iovecs above with "cookie" and "hdr". In the
943 * following loops we pull message chunks from the user iov array and
944 * fill iov_copy piece by piece, possibly truncating chunks from the
945 * caller's iov array. Ugly, but hopefully efficient.
948 while (sent < msglen) {
950 size_t iov_index = 2;
952 fragment_len = sizeof(ctx->cookie) + sizeof(hdr);
954 while (fragment_len < ctx->fragment_len) {
957 space = ctx->fragment_len - fragment_len;
958 chunk = MIN(space, src_iov.iov_len);
960 iov_copy[iov_index].iov_base = src_iov.iov_base;
961 iov_copy[iov_index].iov_len = chunk;
964 src_iov.iov_base = (char *)src_iov.iov_base + chunk;
965 src_iov.iov_len -= chunk;
966 fragment_len += chunk;
968 if (src_iov.iov_len == 0) {
977 sent += (fragment_len - sizeof(ctx->cookie) - sizeof(hdr));
980 * only the last fragment should pass the fd array.
981 * That simplifies the receiver a lot.
984 ret = unix_dgram_send(ctx->dgram, dst,
988 ret = unix_dgram_send(ctx->dgram, dst,
998 if (ctx->cookie == 0) {
1005 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
1006 uint8_t *buf, size_t buflen,
1007 int *fds, size_t num_fds,
1010 struct unix_msg_ctx *ctx = (struct unix_msg_ctx *)private_data;
1011 struct unix_msg_hdr hdr;
1012 struct unix_msg *msg;
1016 if (buflen < sizeof(cookie)) {
1020 memcpy(&cookie, buf, sizeof(cookie));
1022 buf += sizeof(cookie);
1023 buflen -= sizeof(cookie);
1026 ctx->recv_callback(ctx, buf, buflen, fds, num_fds, ctx->private_data);
1030 if (buflen < sizeof(hdr)) {
1033 memcpy(&hdr, buf, sizeof(hdr));
1036 buflen -= sizeof(hdr);
1038 for (msg = ctx->msgs; msg != NULL; msg = msg->next) {
1039 if ((msg->sender_pid == hdr.pid) &&
1040 (msg->sender_sock == hdr.sock)) {
1045 if ((msg != NULL) && (msg->cookie != cookie)) {
1046 DLIST_REMOVE(ctx->msgs, msg);
1052 msg = malloc(offsetof(struct unix_msg, buf) + hdr.msglen);
1056 *msg = (struct unix_msg) {
1057 .msglen = hdr.msglen,
1058 .sender_pid = hdr.pid,
1059 .sender_sock = hdr.sock,
1062 DLIST_ADD(ctx->msgs, msg);
1065 space = msg->msglen - msg->received;
1066 if (buflen > space) {
1070 memcpy(msg->buf + msg->received, buf, buflen);
1071 msg->received += buflen;
1073 if (msg->received < msg->msglen) {
1077 DLIST_REMOVE(ctx->msgs, msg);
1078 ctx->recv_callback(ctx, msg->buf, msg->msglen, fds, num_fds, ctx->private_data);
1083 close_fd_array(fds, num_fds);
1086 int unix_msg_free(struct unix_msg_ctx *ctx)
1090 ret = unix_dgram_free(ctx->dgram);
1095 while (ctx->msgs != NULL) {
1096 struct unix_msg *msg = ctx->msgs;
1097 DLIST_REMOVE(ctx->msgs, msg);