2 * Unix SMB/CIFS implementation.
3 * Copyright (C) Volker Lendecke 2013
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/select.h"
22 #include "system/time.h"
23 #include "system/network.h"
24 #include "dlinklist.h"
25 #include "pthreadpool/pthreadpool.h"
29 * This file implements two abstractions: The "unix_dgram" functions implement
30 * queueing for unix domain datagram sockets. You can send to a destination
31 * socket, and if that has no free space available, it will fall back to an
32 * anonymous socket that will poll for writability. "unix_dgram" expects the
33 * data size not to exceed the system limit.
35 * The "unix_msg" functions implement the fragmentation of large messages on
36 * top of "unix_dgram". This is what is exposed to the user of this API.
39 struct unix_dgram_msg {
40 struct unix_dgram_msg *prev, *next;
49 struct unix_dgram_send_queue {
50 struct unix_dgram_send_queue *prev, *next;
51 struct unix_dgram_ctx *ctx;
53 struct unix_dgram_msg *msgs;
57 struct unix_dgram_ctx {
60 const struct poll_funcs *ev_funcs;
63 void (*recv_callback)(struct unix_dgram_ctx *ctx,
64 uint8_t *msg, size_t msg_len,
68 struct poll_watch *sock_read_watch;
69 struct unix_dgram_send_queue *send_queues;
71 struct pthreadpool *send_pool;
72 struct poll_watch *pool_read_watch;
78 static ssize_t iov_buflen(const struct iovec *iov, int iovlen);
79 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
82 /* Set socket non blocking. */
83 static int prepare_socket_nonblock(int sock)
87 #define FLAG_TO_SET O_NONBLOCK
90 #define FLAG_TO_SET O_NDELAY
92 #define FLAG_TO_SET FNDELAY
96 flags = fcntl(sock, F_GETFL);
100 flags |= FLAG_TO_SET;
101 if (fcntl(sock, F_SETFL, flags) == -1) {
109 /* Set socket close on exec. */
110 static int prepare_socket_cloexec(int sock)
115 flags = fcntl(sock, F_GETFD, 0);
120 if (fcntl(sock, F_SETFD, flags) == -1) {
127 /* Set socket non blocking and close on exec. */
128 static int prepare_socket(int sock)
130 int ret = prepare_socket_nonblock(sock);
135 return prepare_socket_cloexec(sock);
138 static int unix_dgram_init(const struct sockaddr_un *addr, size_t max_msg,
139 const struct poll_funcs *ev_funcs,
140 void (*recv_callback)(struct unix_dgram_ctx *ctx,
141 uint8_t *msg, size_t msg_len,
144 struct unix_dgram_ctx **result)
146 struct unix_dgram_ctx *ctx;
151 pathlen = strlen(addr->sun_path)+1;
156 ctx = malloc(offsetof(struct unix_dgram_ctx, path) + pathlen);
161 memcpy(ctx->path, addr->sun_path, pathlen);
166 ctx->recv_buf = malloc(max_msg);
167 if (ctx->recv_buf == NULL) {
171 ctx->max_msg = max_msg;
172 ctx->ev_funcs = ev_funcs;
173 ctx->recv_callback = recv_callback;
174 ctx->private_data = private_data;
175 ctx->sock_read_watch = NULL;
176 ctx->send_pool = NULL;
177 ctx->pool_read_watch = NULL;
178 ctx->send_queues = NULL;
179 ctx->created_pid = (pid_t)-1;
181 ctx->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
182 if (ctx->sock == -1) {
187 /* Set non-blocking and close-on-exec. */
188 ret = prepare_socket(ctx->sock);
194 ret = bind(ctx->sock,
195 (const struct sockaddr *)(const void *)addr,
202 ctx->created_pid = getpid();
204 ctx->sock_read_watch = ctx->ev_funcs->watch_new(
205 ctx->ev_funcs, ctx->sock, POLLIN,
206 unix_dgram_recv_handler, ctx);
208 if (ctx->sock_read_watch == NULL) {
225 static void unix_dgram_recv_handler(struct poll_watch *w, int fd, short events,
228 struct unix_dgram_ctx *ctx = (struct unix_dgram_ctx *)private_data;
233 iov = (struct iovec) {
234 .iov_base = (void *)ctx->recv_buf,
235 .iov_len = ctx->max_msg,
238 msg = (struct msghdr) {
241 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
247 received = recvmsg(fd, &msg, 0);
248 if (received == -1) {
249 if ((errno == EAGAIN) ||
251 (errno == EWOULDBLOCK) ||
253 (errno == EINTR) || (errno == ENOMEM)) {
254 /* Not really an error - just try again. */
257 /* Problem with the socket. Set it unreadable. */
258 ctx->ev_funcs->watch_update(w, 0);
261 if (received > ctx->max_msg) {
262 /* More than we expected, not for us */
265 ctx->recv_callback(ctx, ctx->recv_buf, received, ctx->private_data);
268 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
271 static int unix_dgram_init_pthreadpool(struct unix_dgram_ctx *ctx)
275 if (ctx->send_pool != NULL) {
279 ret = pthreadpool_init(0, &ctx->send_pool);
284 signalfd = pthreadpool_signal_fd(ctx->send_pool);
286 ctx->pool_read_watch = ctx->ev_funcs->watch_new(
287 ctx->ev_funcs, signalfd, POLLIN,
288 unix_dgram_job_finished, ctx);
289 if (ctx->pool_read_watch == NULL) {
290 pthreadpool_destroy(ctx->send_pool);
291 ctx->send_pool = NULL;
298 static int unix_dgram_send_queue_init(
299 struct unix_dgram_ctx *ctx, const char *path,
300 struct unix_dgram_send_queue **result)
302 struct unix_dgram_send_queue *q;
303 struct sockaddr_un addr = { 0, };
307 pathlen = strlen(path)+1;
309 if (pathlen > sizeof(addr.sun_path)) {
313 q = malloc(offsetof(struct unix_dgram_send_queue, path) + pathlen);
319 memcpy(q->path, path, pathlen);
321 q->sock = socket(AF_UNIX, SOCK_DGRAM, 0);
327 err = prepare_socket_cloexec(q->sock);
332 addr.sun_family = AF_UNIX;
333 memcpy(addr.sun_path, path, pathlen+1);
336 ret = connect(q->sock, (struct sockaddr *)&addr, sizeof(addr));
337 } while ((ret == -1) && (errno == EINTR));
344 err = unix_dgram_init_pthreadpool(ctx);
349 DLIST_ADD(ctx->send_queues, q);
361 static void unix_dgram_send_queue_free(struct unix_dgram_send_queue *q)
363 struct unix_dgram_ctx *ctx = q->ctx;
365 while (q->msgs != NULL) {
366 struct unix_dgram_msg *msg;
368 DLIST_REMOVE(q->msgs, msg);
372 DLIST_REMOVE(ctx->send_queues, q);
376 static struct unix_dgram_send_queue *find_send_queue(
377 struct unix_dgram_ctx *ctx, const char *dst_sock)
379 struct unix_dgram_send_queue *s;
381 for (s = ctx->send_queues; s != NULL; s = s->next) {
382 if (strcmp(s->path, dst_sock) == 0) {
389 static int queue_msg(struct unix_dgram_send_queue *q,
390 const struct iovec *iov, int iovlen)
392 struct unix_dgram_msg *msg;
397 buflen = iov_buflen(iov, iovlen);
402 msglen = offsetof(struct unix_dgram_msg, buf) + buflen;
403 if ((msglen < buflen) ||
404 (msglen < offsetof(struct unix_dgram_msg, buf))) {
409 msg = malloc(msglen);
413 msg->buflen = buflen;
417 for (i=0; i<iovlen; i++) {
418 memcpy(&msg->buf[buflen], iov[i].iov_base, iov[i].iov_len);
419 buflen += iov[i].iov_len;
422 DLIST_ADD_END(q->msgs, msg, struct unix_dgram_msg);
426 static void unix_dgram_send_job(void *private_data)
428 struct unix_dgram_msg *msg = private_data;
431 msg->sent = send(msg->sock, msg->buf, msg->buflen, 0);
432 } while ((msg->sent == -1) && (errno == EINTR));
435 static void unix_dgram_job_finished(struct poll_watch *w, int fd, short events,
438 struct unix_dgram_ctx *ctx = private_data;
439 struct unix_dgram_send_queue *q;
440 struct unix_dgram_msg *msg;
443 ret = pthreadpool_finished_jobs(ctx->send_pool, &job, 1);
448 for (q = ctx->send_queues; q != NULL; q = q->next) {
449 if (job == q->sock) {
455 /* Huh? Should not happen */
460 DLIST_REMOVE(q->msgs, msg);
463 if (q->msgs != NULL) {
464 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
465 unix_dgram_send_job, q->msgs);
471 unix_dgram_send_queue_free(q);
474 static int unix_dgram_send(struct unix_dgram_ctx *ctx, const char *dst_sock,
475 const struct iovec *iov, int iovlen)
477 struct unix_dgram_send_queue *q;
478 struct sockaddr_un addr = { 0, };
483 dst_len = strlen(dst_sock);
484 if (dst_len >= sizeof(addr.sun_path)) {
489 * To preserve message ordering, we have to queue a message when
490 * others are waiting in line already.
492 q = find_send_queue(ctx, dst_sock);
494 return queue_msg(q, iov, iovlen);
498 * Try a cheap nonblocking send
501 addr.sun_family = AF_UNIX;
502 memcpy(addr.sun_path, dst_sock, dst_len);
504 msg.msg_name = &addr;
505 msg.msg_namelen = sizeof(addr);
506 msg.msg_iov = discard_const_p(struct iovec, iov);
507 msg.msg_iovlen = iovlen;
508 #ifdef HAVE_STRUCT_MSGHDR_MSG_CONTROL
509 msg.msg_control = NULL;
510 msg.msg_controllen = 0;
514 ret = sendmsg(ctx->sock, &msg, 0);
519 if ((errno != EWOULDBLOCK) && (errno != EAGAIN) && (errno != EINTR)) {
521 if ((errno != EAGAIN) && (errno != EINTR)) {
526 ret = unix_dgram_send_queue_init(ctx, dst_sock, &q);
530 ret = queue_msg(q, iov, iovlen);
532 unix_dgram_send_queue_free(q);
535 ret = pthreadpool_add_job(ctx->send_pool, q->sock,
536 unix_dgram_send_job, q->msgs);
538 unix_dgram_send_queue_free(q);
544 static int unix_dgram_sock(struct unix_dgram_ctx *ctx)
549 static int unix_dgram_free(struct unix_dgram_ctx *ctx)
551 if (ctx->send_queues != NULL) {
555 if (ctx->send_pool != NULL) {
556 int ret = pthreadpool_destroy(ctx->send_pool);
560 ctx->ev_funcs->watch_free(ctx->pool_read_watch);
563 ctx->ev_funcs->watch_free(ctx->sock_read_watch);
565 if (getpid() == ctx->created_pid) {
566 /* If we created it, unlink. Otherwise someone else might
567 * still have it open */
578 * Every message starts with a uint64_t cookie.
580 * A value of 0 indicates a single-fragment message which is complete in
581 * itself. The data immediately follows the cookie.
583 * Every multi-fragment message has a cookie != 0 and starts with a cookie
584 * followed by a struct unix_msg_header and then the data. The pid and sock
585 * fields are used to assure uniqueness on the receiver side.
588 struct unix_msg_hdr {
595 struct unix_msg *prev, *next;
604 struct unix_msg_ctx {
605 struct unix_dgram_ctx *dgram;
609 void (*recv_callback)(struct unix_msg_ctx *ctx,
610 uint8_t *msg, size_t msg_len,
614 struct unix_msg *msgs;
617 static void unix_msg_recv(struct unix_dgram_ctx *ctx,
618 uint8_t *msg, size_t msg_len,
621 int unix_msg_init(const char *path, const struct poll_funcs *ev_funcs,
622 size_t fragment_len, uint64_t cookie,
623 void (*recv_callback)(struct unix_msg_ctx *ctx,
624 uint8_t *msg, size_t msg_len,
627 struct unix_msg_ctx **result)
629 struct unix_msg_ctx *ctx;
630 struct sockaddr_un addr;
631 struct sockaddr_un *paddr = NULL;
634 ctx = malloc(sizeof(*ctx));
640 size_t pathlen = strlen(path)+1;
642 if (pathlen > sizeof(addr.sun_path)) {
645 addr = (struct sockaddr_un) { .sun_family = AF_UNIX };
646 memcpy(addr.sun_path, path, pathlen);
650 ret = unix_dgram_init(paddr, fragment_len, ev_funcs,
651 unix_msg_recv, ctx, &ctx->dgram);
657 ctx->fragment_len = fragment_len;
658 ctx->cookie = cookie;
659 ctx->recv_callback = recv_callback;
660 ctx->private_data = private_data;
667 int unix_msg_send(struct unix_msg_ctx *ctx, const char *dst_sock,
668 const struct iovec *iov, int iovlen)
673 struct iovec *iov_copy;
674 struct unix_msg_hdr hdr;
675 struct iovec src_iov;
681 msglen = iov_buflen(iov, iovlen);
686 if (msglen <= (ctx->fragment_len - sizeof(uint64_t))) {
687 struct iovec tmp_iov[iovlen+1];
690 tmp_iov[0].iov_base = &cookie;
691 tmp_iov[0].iov_len = sizeof(cookie);
693 memcpy(&tmp_iov[1], iov,
694 sizeof(struct iovec) * iovlen);
697 return unix_dgram_send(ctx->dgram, dst_sock, tmp_iov,
703 hdr.sock = unix_dgram_sock(ctx->dgram);
705 iov_copy = malloc(sizeof(struct iovec) * (iovlen + 2));
706 if (iov_copy == NULL) {
709 iov_copy[0].iov_base = &ctx->cookie;
710 iov_copy[0].iov_len = sizeof(ctx->cookie);
711 iov_copy[1].iov_base = &hdr;
712 iov_copy[1].iov_len = sizeof(hdr);
718 * The following write loop sends the user message in pieces. We have
719 * filled the first two iovecs above with "cookie" and "hdr". In the
720 * following loops we pull message chunks from the user iov array and
721 * fill iov_copy piece by piece, possibly truncating chunks from the
722 * caller's iov array. Ugly, but hopefully efficient.
725 while (sent < msglen) {
727 size_t iov_index = 2;
729 fragment_len = sizeof(ctx->cookie) + sizeof(hdr);
731 while (fragment_len < ctx->fragment_len) {
734 space = ctx->fragment_len - fragment_len;
735 chunk = MIN(space, src_iov.iov_len);
737 iov_copy[iov_index].iov_base = src_iov.iov_base;
738 iov_copy[iov_index].iov_len = chunk;
741 src_iov.iov_base = (char *)src_iov.iov_base + chunk;
742 src_iov.iov_len -= chunk;
743 fragment_len += chunk;
745 if (src_iov.iov_len == 0) {
754 sent += (fragment_len - sizeof(ctx->cookie) - sizeof(hdr));
756 ret = unix_dgram_send(ctx->dgram, dst_sock,
757 iov_copy, iov_index);
766 if (ctx->cookie == 0) {
773 static void unix_msg_recv(struct unix_dgram_ctx *dgram_ctx,
774 uint8_t *buf, size_t buflen,
777 struct unix_msg_ctx *ctx = (struct unix_msg_ctx *)private_data;
778 struct unix_msg_hdr hdr;
779 struct unix_msg *msg;
783 if (buflen < sizeof(cookie)) {
786 memcpy(&cookie, buf, sizeof(cookie));
788 buf += sizeof(cookie);
789 buflen -= sizeof(cookie);
792 ctx->recv_callback(ctx, buf, buflen, ctx->private_data);
796 if (buflen < sizeof(hdr)) {
799 memcpy(&hdr, buf, sizeof(hdr));
802 buflen -= sizeof(hdr);
804 for (msg = ctx->msgs; msg != NULL; msg = msg->next) {
805 if ((msg->sender_pid == hdr.pid) &&
806 (msg->sender_sock == hdr.sock)) {
811 if ((msg != NULL) && (msg->cookie != cookie)) {
812 DLIST_REMOVE(ctx->msgs, msg);
818 msg = malloc(offsetof(struct unix_msg, buf) + hdr.msglen);
822 msg->msglen = hdr.msglen;
824 msg->sender_pid = hdr.pid;
825 msg->sender_sock = hdr.sock;
826 msg->cookie = cookie;
827 DLIST_ADD(ctx->msgs, msg);
830 space = msg->msglen - msg->received;
831 if (buflen > space) {
835 memcpy(msg->buf + msg->received, buf, buflen);
836 msg->received += buflen;
838 if (msg->received < msg->msglen) {
842 DLIST_REMOVE(ctx->msgs, msg);
843 ctx->recv_callback(ctx, msg->buf, msg->msglen, ctx->private_data);
847 int unix_msg_free(struct unix_msg_ctx *ctx)
851 ret = unix_dgram_free(ctx->dgram);
856 while (ctx->msgs != NULL) {
857 struct unix_msg *msg = ctx->msgs;
858 DLIST_REMOVE(ctx->msgs, msg);
866 static ssize_t iov_buflen(const struct iovec *iov, int iovlen)
871 for (i=0; i<iovlen; i++) {
872 size_t thislen = iov[i].iov_len;
873 size_t tmp = buflen + thislen;
875 if ((tmp < buflen) || (tmp < thislen)) {