2 * Unix SMB/CIFS implementation.
3 * Copyright (C) Volker Lendecke 2013
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/select.h"
22 #include "system/time.h"
23 #include "system/network.h"
24 #include "dlinklist.h"
25 #include "pthreadpool/pthreadpool.h"
29 * This file implements two abstractions: The "unix_dgram" functions implement
30 * queueing for unix domain datagram sockets. You can send to a destination
31 * socket, and if that has no free space available, it will fall back to an
32 * anonymous socket that will poll for writability. "unix_dgram" expects the
33 * data size not to exceed the system limit.
35 * The "unix_msg" functions implement the fragmentation of large messages on
36 * top of "unix_dgram". This is what is exposed to the user of this API.
39 struct unix_dgram_msg {
40 struct unix_dgram_msg *prev, *next;
50 struct unix_dgram_send_queue {
51 struct unix_dgram_send_queue *prev, *next;
52 struct unix_dgram_ctx *ctx;
54 struct unix_dgram_msg *msgs;
58 struct unix_dgram_ctx {
61 const struct poll_funcs *ev_funcs;
64 void (*recv_callback)(struct unix_dgram_ctx *ctx,
65 uint8_t *msg, size_t msg_len,
66 int *fds, size_t num_fds,
70 struct poll_watch *sock_read_watch;
71 struct unix_dgram_send_queue *send_queues;
73 struct pthreadpool *send_pool;
74 struct poll_watch *pool_read_watch;
80 static ssize_t iov_buflen(const struct iovec *iov, int iovlen);
81 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
84 /* Set socket non blocking. */
85 static int prepare_socket_nonblock(int sock)
89 #define FLAG_TO_SET O_NONBLOCK
92 #define FLAG_TO_SET O_NDELAY
94 #define FLAG_TO_SET FNDELAY
98 flags = fcntl(sock, F_GETFL);
102 flags |= FLAG_TO_SET;
103 if (fcntl(sock, F_SETFL, flags) == -1) {
111 /* Set socket close on exec. */
112 static int prepare_socket_cloexec(int sock)
117 flags = fcntl(sock, F_GETFD, 0);
122 if (fcntl(sock, F_SETFD, flags) == -1) {
129 /* Set socket non blocking and close on exec. */
130 static int prepare_socket(int sock)
132 int ret = prepare_socket_nonblock(sock);
137 return prepare_socket_cloexec(sock);
140 static void extract_fd_array_from_msghdr(struct msghdr *msg, int **fds,
143 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
144 struct cmsghdr *cmsg;
146 for(cmsg = CMSG_FIRSTHDR(msg);
148 cmsg = CMSG_NXTHDR(msg, cmsg))
150 void *data = CMSG_DATA(cmsg);
152 if (cmsg->cmsg_type != SCM_RIGHTS) {
155 if (cmsg->cmsg_level != SOL_SOCKET) {
160 *num_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof (int);
166 static void close_fd_array(int *fds, size_t num_fds)
170 for (i = 0; i < num_fds; i++) {
180 static int unix_dgram_init(const struct sockaddr_un *addr, size_t max_msg,
181 const struct poll_funcs *ev_funcs,
182 void (*recv_callback)(struct unix_dgram_ctx *ctx,
183 uint8_t *msg, size_t msg_len,
184 int *fds, size_t num_fds,
187 struct unix_dgram_ctx **result)
189 struct unix_dgram_ctx *ctx;
194 pathlen = strlen(addr->sun_path)+1;
199 ctx = malloc(offsetof(struct unix_dgram_ctx, path) + pathlen);
204 memcpy(ctx->path, addr->sun_path, pathlen);
209 *ctx = (struct unix_dgram_ctx) {
211 .ev_funcs = ev_funcs,
212 .recv_callback = recv_callback,
213 .private_data = private_data,
214 .created_pid = (pid_t)-1
217 ctx->recv_buf = malloc(max_msg);
218 if (ctx->recv_buf == NULL) {
223 ctx->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
224 if (ctx->sock == -1) {
229 /* Set non-blocking and close-on-exec. */
230 ret = prepare_socket(ctx->sock);
236 ret = bind(ctx->sock,
237 (const struct sockaddr *)(const void *)addr,
244 ctx->created_pid = getpid();
246 ctx->sock_read_watch = ctx->ev_funcs->watch_new(
247 ctx->ev_funcs, ctx->sock, POLLIN,
248 unix_dgram_recv_handler, ctx);
250 if (ctx->sock_read_watch == NULL) {
267 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
270 struct unix_dgram_ctx *ctx = (struct unix_dgram_ctx *)private_data;
275 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
276 char buf[CMSG_SPACE(sizeof(int)*INT8_MAX)] = { 0, };
277 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
279 size_t i, num_fds = 0;
281 iov = (struct iovec) {
282 .iov_base = (void *)ctx->recv_buf,
283 .iov_len = ctx->max_msg,
286 msg = (struct msghdr) {
289 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
291 .msg_controllen = sizeof(buf),
295 #ifdef MSG_CMSG_CLOEXEC
296 flags |= MSG_CMSG_CLOEXEC;
299 received = recvmsg(fd, &msg, flags);
300 if (received == -1) {
301 if ((errno == EAGAIN) ||
302 (errno == EWOULDBLOCK) ||
303 (errno == EINTR) || (errno == ENOMEM)) {
304 /* Not really an error - just try again. */
307 /* Problem with the socket. Set it unreadable. */
308 ctx->ev_funcs->watch_update(w, 0);
311 if (received > ctx->max_msg) {
312 /* More than we expected, not for us */
316 extract_fd_array_from_msghdr(&msg, &fds, &num_fds);
318 for (i = 0; i < num_fds; i++) {
321 err = prepare_socket_cloexec(fds[i]);
327 ctx->recv_callback(ctx, ctx->recv_buf, received,
328 fds, num_fds, ctx->private_data);
332 close_fd_array(fds, num_fds);
334 ctx->recv_callback(ctx, ctx->recv_buf, received,
335 NULL, 0, ctx->private_data);
338 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
341 static int unix_dgram_init_pthreadpool(struct unix_dgram_ctx *ctx)
345 if (ctx->send_pool != NULL) {
349 ret = pthreadpool_init(0, &ctx->send_pool);
354 signalfd = pthreadpool_signal_fd(ctx->send_pool);
356 ctx->pool_read_watch = ctx->ev_funcs->watch_new(
357 ctx->ev_funcs, signalfd, POLLIN,
358 unix_dgram_job_finished, ctx);
359 if (ctx->pool_read_watch == NULL) {
360 pthreadpool_destroy(ctx->send_pool);
361 ctx->send_pool = NULL;
368 static int unix_dgram_send_queue_init(
369 struct unix_dgram_ctx *ctx, const struct sockaddr_un *dst,
370 struct unix_dgram_send_queue **result)
372 struct unix_dgram_send_queue *q;
376 pathlen = strlen(dst->sun_path)+1;
378 q = malloc(offsetof(struct unix_dgram_send_queue, path) + pathlen);
384 memcpy(q->path, dst->sun_path, pathlen);
386 q->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
392 err = prepare_socket_cloexec(q->sock);
398 ret = connect(q->sock,
399 (const struct sockaddr *)(const void *)dst,
401 } while ((ret == -1) && (errno == EINTR));
408 err = unix_dgram_init_pthreadpool(ctx);
413 DLIST_ADD(ctx->send_queues, q);
425 static void unix_dgram_send_queue_free(struct unix_dgram_send_queue *q)
427 struct unix_dgram_ctx *ctx = q->ctx;
429 while (q->msgs != NULL) {
430 struct unix_dgram_msg *msg;
432 DLIST_REMOVE(q->msgs, msg);
433 close_fd_array(msg->fds, msg->num_fds);
437 DLIST_REMOVE(ctx->send_queues, q);
441 static struct unix_dgram_send_queue *find_send_queue(
442 struct unix_dgram_ctx *ctx, const char *dst_sock)
444 struct unix_dgram_send_queue *s;
446 for (s = ctx->send_queues; s != NULL; s = s->next) {
447 if (strcmp(s->path, dst_sock) == 0) {
454 static int queue_msg(struct unix_dgram_send_queue *q,
455 const struct iovec *iov, int iovlen,
456 const int *fds, size_t num_fds)
458 struct unix_dgram_msg *msg;
462 size_t fds_size = sizeof(int) * num_fds;
463 int fds_copy[MIN(num_fds, INT8_MAX)];
464 size_t fds_padding = 0;
469 if (num_fds > INT8_MAX) {
473 msglen = sizeof(struct unix_dgram_msg);
475 data_len = iov_buflen(iov, iovlen);
476 if (data_len == -1) {
480 tmp = msglen + data_len;
481 if ((tmp < msglen) || (tmp < data_len)) {
488 const size_t fds_align = sizeof(int) - 1;
490 tmp = msglen + fds_align;
491 if ((tmp < msglen) || (tmp < fds_align)) {
497 fds_padding = tmp - msglen;
500 tmp = msglen + fds_size;
501 if ((tmp < msglen) || (tmp < fds_size)) {
508 for (i = 0; i < num_fds; i++) {
512 for (i = 0; i < num_fds; i++) {
513 fds_copy[i] = dup(fds[i]);
514 if (fds_copy[i] == -1) {
520 msg = malloc(msglen);
528 data_buf = (uint8_t *)(msg + 1);
530 msg->iov = (struct iovec) {
531 .iov_base = (void *)data_buf,
535 for (i=0; i<iovlen; i++) {
536 memcpy(data_buf, iov[i].iov_base, iov[i].iov_len);
537 data_buf += iov[i].iov_len;
540 msg->num_fds = num_fds;
541 if (msg->num_fds > 0) {
543 data_buf += fds_padding;
544 fds_ptr= (void *)data_buf;
545 memcpy(fds_ptr, fds_copy, fds_size);
546 msg->fds = (int *)fds_ptr;
551 DLIST_ADD_END(q->msgs, msg, struct unix_dgram_msg);
555 close_fd_array(fds_copy, num_fds);
559 static void unix_dgram_send_job(void *private_data)
561 struct unix_dgram_msg *dmsg = private_data;
562 struct msghdr msg = {
563 .msg_iov = &dmsg->iov,
566 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
567 struct cmsghdr *cmsg;
568 size_t fds_size = sizeof(int) * dmsg->num_fds;
569 size_t cmsg_len = CMSG_LEN(fds_size);
570 size_t cmsg_space = CMSG_SPACE(fds_size);
571 char cmsg_buf[cmsg_space];
573 if (dmsg->num_fds > 0) {
576 memset(cmsg_buf, 0, cmsg_space);
578 msg.msg_control = cmsg_buf;
579 msg.msg_controllen = cmsg_space;
580 cmsg = CMSG_FIRSTHDR(&msg);
581 cmsg->cmsg_level = SOL_SOCKET;
582 cmsg->cmsg_type = SCM_RIGHTS;
583 cmsg->cmsg_len = cmsg_len;
584 fdptr = CMSG_DATA(cmsg);
585 memcpy(fdptr, dmsg->fds, fds_size);
586 msg.msg_controllen = cmsg->cmsg_len;
588 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
591 dmsg->sent = sendmsg(dmsg->sock, &msg, 0);
592 } while ((dmsg->sent == -1) && (errno == EINTR));
594 if (dmsg->sent == -1) {
595 dmsg->sys_errno = errno;
599 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
602 struct unix_dgram_ctx *ctx = private_data;
603 struct unix_dgram_send_queue *q;
604 struct unix_dgram_msg *msg;
607 ret = pthreadpool_finished_jobs(ctx->send_pool, &job, 1);
612 for (q = ctx->send_queues; q != NULL; q = q->next) {
613 if (job == q->sock) {
619 /* Huh? Should not happen */
624 DLIST_REMOVE(q->msgs, msg);
625 close_fd_array(msg->fds, msg->num_fds);
628 if (q->msgs != NULL) {
629 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
630 unix_dgram_send_job, q->msgs);
636 unix_dgram_send_queue_free(q);
639 static int unix_dgram_send(struct unix_dgram_ctx *ctx,
640 const struct sockaddr_un *dst,
641 const struct iovec *iov, int iovlen,
642 const int *fds, size_t num_fds)
644 struct unix_dgram_send_queue *q;
646 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
647 struct cmsghdr *cmsg;
648 size_t fds_size = sizeof(int) * num_fds;
649 size_t cmsg_len = CMSG_LEN(fds_size);
650 size_t cmsg_space = CMSG_SPACE(fds_size);
651 char cmsg_buf[cmsg_space];
652 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
656 if (num_fds > INT8_MAX) {
660 #ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
664 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
666 for (i = 0; i < num_fds; i++) {
668 * Make sure we only allow fd passing
669 * for communication channels,
670 * e.g. sockets, pipes, fifos, ...
672 ret = lseek(fds[i], 0, SEEK_CUR);
673 if (ret == -1 && errno == ESPIPE) {
679 * Reject the message as we may need to call dup(),
680 * if we queue the message.
682 * That might result in unexpected behavior for the caller
683 * for files and broken posix locking.
689 * To preserve message ordering, we have to queue a message when
690 * others are waiting in line already.
692 q = find_send_queue(ctx, dst->sun_path);
694 return queue_msg(q, iov, iovlen, fds, num_fds);
698 * Try a cheap nonblocking send
701 msg = (struct msghdr) {
702 .msg_name = discard_const_p(struct sockaddr_un, dst),
703 .msg_namelen = sizeof(*dst),
704 .msg_iov = discard_const_p(struct iovec, iov),
707 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
711 memset(cmsg_buf, 0, cmsg_space);
713 msg.msg_control = cmsg_buf;
714 msg.msg_controllen = cmsg_space;
715 cmsg = CMSG_FIRSTHDR(&msg);
716 cmsg->cmsg_level = SOL_SOCKET;
717 cmsg->cmsg_type = SCM_RIGHTS;
718 cmsg->cmsg_len = cmsg_len;
719 fdptr = CMSG_DATA(cmsg);
720 memcpy(fdptr, fds, fds_size);
721 msg.msg_controllen = cmsg->cmsg_len;
723 #endif /* HAVE_STRUCT_MSGHDR_MSG_CONTROL */
725 ret = sendmsg(ctx->sock, &msg, 0);
729 if ((errno != EWOULDBLOCK) && (errno != EAGAIN) && (errno != EINTR)) {
733 ret = unix_dgram_send_queue_init(ctx, dst, &q);
737 ret = queue_msg(q, iov, iovlen, fds, num_fds);
739 unix_dgram_send_queue_free(q);
742 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
743 unix_dgram_send_job, q->msgs);
745 unix_dgram_send_queue_free(q);
751 static int unix_dgram_sock(struct unix_dgram_ctx *ctx)
756 static int unix_dgram_free(struct unix_dgram_ctx *ctx)
758 if (ctx->send_queues != NULL) {
762 if (ctx->send_pool != NULL) {
763 int ret = pthreadpool_destroy(ctx->send_pool);
767 ctx->ev_funcs->watch_free(ctx->pool_read_watch);
770 ctx->ev_funcs->watch_free(ctx->sock_read_watch);
772 if (getpid() == ctx->created_pid) {
773 /* If we created it, unlink. Otherwise someone else might
774 * still have it open */
785 * Every message starts with a uint64_t cookie.
787 * A value of 0 indicates a single-fragment message which is complete in
788 * itself. The data immediately follows the cookie.
790 * Every multi-fragment message has a cookie != 0 and starts with a cookie
791 * followed by a struct unix_msg_header and then the data. The pid and sock
792 * fields are used to assure uniqueness on the receiver side.
795 struct unix_msg_hdr {
802 struct unix_msg *prev, *next;
811 struct unix_msg_ctx {
812 struct unix_dgram_ctx *dgram;
816 void (*recv_callback)(struct unix_msg_ctx *ctx,
817 uint8_t *msg, size_t msg_len,
818 int *fds, size_t num_fds,
822 struct unix_msg *msgs;
825 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
826 uint8_t *buf, size_t buflen,
827 int *fds, size_t num_fds,
830 int unix_msg_init(const struct sockaddr_un *addr,
831 const struct poll_funcs *ev_funcs,
832 size_t fragment_len, uint64_t cookie,
833 void (*recv_callback)(struct unix_msg_ctx *ctx,
834 uint8_t *msg, size_t msg_len,
835 int *fds, size_t num_fds,
838 struct unix_msg_ctx **result)
840 struct unix_msg_ctx *ctx;
843 ctx = malloc(sizeof(*ctx));
848 *ctx = (struct unix_msg_ctx) {
849 .fragment_len = fragment_len,
851 .recv_callback = recv_callback,
852 .private_data = private_data
855 ret = unix_dgram_init(addr, fragment_len, ev_funcs,
856 unix_msg_recv, ctx, &ctx->dgram);
866 int unix_msg_send(struct unix_msg_ctx *ctx, const struct sockaddr_un *dst,
867 const struct iovec *iov, int iovlen,
868 const int *fds, size_t num_fds)
873 struct iovec iov_copy[iovlen+2];
874 struct unix_msg_hdr hdr;
875 struct iovec src_iov;
881 msglen = iov_buflen(iov, iovlen);
886 #ifndef HAVE_STRUCT_MSGHDR_MSG_CONTROL
890 #endif /* ! HAVE_STRUCT_MSGHDR_MSG_CONTROL */
892 if (num_fds > INT8_MAX) {
896 if (msglen <= (ctx->fragment_len - sizeof(uint64_t))) {
899 iov_copy[0].iov_base = &cookie;
900 iov_copy[0].iov_len = sizeof(cookie);
902 memcpy(&iov_copy[1], iov,
903 sizeof(struct iovec) * iovlen);
906 return unix_dgram_send(ctx->dgram, dst, iov_copy, iovlen+1,
910 hdr = (struct unix_msg_hdr) {
913 .sock = unix_dgram_sock(ctx->dgram)
916 iov_copy[0].iov_base = &ctx->cookie;
917 iov_copy[0].iov_len = sizeof(ctx->cookie);
918 iov_copy[1].iov_base = &hdr;
919 iov_copy[1].iov_len = sizeof(hdr);
925 * The following write loop sends the user message in pieces. We have
926 * filled the first two iovecs above with "cookie" and "hdr". In the
927 * following loops we pull message chunks from the user iov array and
928 * fill iov_copy piece by piece, possibly truncating chunks from the
929 * caller's iov array. Ugly, but hopefully efficient.
932 while (sent < msglen) {
934 size_t iov_index = 2;
936 fragment_len = sizeof(ctx->cookie) + sizeof(hdr);
938 while (fragment_len < ctx->fragment_len) {
941 space = ctx->fragment_len - fragment_len;
942 chunk = MIN(space, src_iov.iov_len);
944 iov_copy[iov_index].iov_base = src_iov.iov_base;
945 iov_copy[iov_index].iov_len = chunk;
948 src_iov.iov_base = (char *)src_iov.iov_base + chunk;
949 src_iov.iov_len -= chunk;
950 fragment_len += chunk;
952 if (src_iov.iov_len == 0) {
961 sent += (fragment_len - sizeof(ctx->cookie) - sizeof(hdr));
964 * only the last fragment should pass the fd array.
965 * That simplifies the receiver a lot.
968 ret = unix_dgram_send(ctx->dgram, dst,
972 ret = unix_dgram_send(ctx->dgram, dst,
982 if (ctx->cookie == 0) {
989 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
990 uint8_t *buf, size_t buflen,
991 int *fds, size_t num_fds,
994 struct unix_msg_ctx *ctx = (struct unix_msg_ctx *)private_data;
995 struct unix_msg_hdr hdr;
996 struct unix_msg *msg;
1000 if (buflen < sizeof(cookie)) {
1004 memcpy(&cookie, buf, sizeof(cookie));
1006 buf += sizeof(cookie);
1007 buflen -= sizeof(cookie);
1010 ctx->recv_callback(ctx, buf, buflen, fds, num_fds, ctx->private_data);
1014 if (buflen < sizeof(hdr)) {
1017 memcpy(&hdr, buf, sizeof(hdr));
1020 buflen -= sizeof(hdr);
1022 for (msg = ctx->msgs; msg != NULL; msg = msg->next) {
1023 if ((msg->sender_pid == hdr.pid) &&
1024 (msg->sender_sock == hdr.sock)) {
1029 if ((msg != NULL) && (msg->cookie != cookie)) {
1030 DLIST_REMOVE(ctx->msgs, msg);
1036 msg = malloc(offsetof(struct unix_msg, buf) + hdr.msglen);
1040 *msg = (struct unix_msg) {
1041 .msglen = hdr.msglen,
1042 .sender_pid = hdr.pid,
1043 .sender_sock = hdr.sock,
1046 DLIST_ADD(ctx->msgs, msg);
1049 space = msg->msglen - msg->received;
1050 if (buflen > space) {
1054 memcpy(msg->buf + msg->received, buf, buflen);
1055 msg->received += buflen;
1057 if (msg->received < msg->msglen) {
1061 DLIST_REMOVE(ctx->msgs, msg);
1062 ctx->recv_callback(ctx, msg->buf, msg->msglen, fds, num_fds, ctx->private_data);
1067 close_fd_array(fds, num_fds);
1070 int unix_msg_free(struct unix_msg_ctx *ctx)
1074 ret = unix_dgram_free(ctx->dgram);
1079 while (ctx->msgs != NULL) {
1080 struct unix_msg *msg = ctx->msgs;
1081 DLIST_REMOVE(ctx->msgs, msg);
1089 static ssize_t iov_buflen(const struct iovec *iov, int iovlen)
1094 for (i=0; i<iovlen; i++) {
1095 size_t thislen = iov[i].iov_len;
1096 size_t tmp = buflen + thislen;
1098 if ((tmp < buflen) || (tmp < thislen)) {