2 Unix SMB/CIFS implementation.
3 Infrastructure for SMB-Direct RDMA as transport
4 Copyright (C) Stefan Metzmacher 2012,2016
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/network.h"
23 #include "lib/util/tevent_ntstatus.h"
24 #include "lib/tsocket/tsocket.h"
25 #include "lib/util/util_net.h"
26 #include "libcli/smb/smb_common.h"
27 #include "libcli/smb/smb_direct.h"
28 #include "lib/util/dlinklist.h"
29 #include "lib/util/iov_buf.h"
31 #ifdef SMB_TRANSPORT_ENABLE_RDMA
32 #include <rdma/rdma_cma_abi.h>
33 #include <rdma/rdma_cma.h>
34 #include <infiniband/verbs.h>
38 struct smb_direct_connection {
39 struct tevent_context *last_ev;
41 uint32_t max_send_size;
42 uint32_t max_receive_size;
43 uint32_t max_fragmented_size;
44 uint32_t max_read_write_size;
45 uint16_t send_credit_target;
46 uint16_t send_credits;
47 uint16_t receive_credit_max;
48 uint16_t receive_credit_target;
49 uint16_t receive_credits;
50 uint32_t keep_alive_internal;
53 int tmp_fd; /* given to the caller end */
55 struct tevent_fd *fde;
58 struct rdma_cm_id *cm_id;
59 struct rdma_event_channel *cm_channel;
60 struct tevent_fd *fde_channel;
61 enum rdma_cm_event_type expected_event;
62 struct rdma_cm_event *cm_event;
66 struct ibv_comp_channel *comp_channel;
67 struct tevent_fd *fde_channel;
68 struct ibv_cq *send_cq;
69 struct ibv_cq *recv_cq;
71 struct ibv_qp_init_attr init_attr;
74 TALLOC_CTX *io_mem_ctx;
77 * here we have io coming into
78 * the rdma layer, which needs to
79 * be flushed to the socketpair
81 struct smb_direct_io *idle;
82 struct smb_direct_io *posted;
83 struct smb_direct_io *ready;
84 struct smb_direct_io *out;
85 uint32_t remaining_length;
89 * here we have io coming from the socketpair
90 * which needs to be flushed into the rdma layer.
92 struct smb_direct_io *idle;
93 struct smb_direct_io *posted;
94 struct smb_direct_io *ready;
95 struct smb_direct_io *in;
96 uint32_t remaining_length;
100 #define SMB_DIRECT_IO_MAX_DATA 8192
102 struct smb_direct_io {
103 struct smb_direct_io *prev, *next;
105 struct ibv_mr *hdr_mr;
106 struct ibv_mr *data_mr;
107 struct ibv_sge sge[2];
109 struct ibv_recv_wr recv_wr;
110 struct ibv_send_wr send_wr;
112 struct iovec _iov_array[2];
116 uint32_t data_length;
117 uint32_t remaining_length;
119 uint8_t nbt_hdr[0x04];
120 uint8_t smbd_hdr[0x18];
121 uint8_t data[SMB_DIRECT_IO_MAX_DATA];
124 static int smb_direct_io_destructor(struct smb_direct_io *io);
126 static struct smb_direct_io *smb_direct_io_create(struct smb_direct_connection *c)
128 struct smb_direct_io *io;
130 if (c->io_mem_ctx == NULL) {
134 io = talloc_zero(c->io_mem_ctx, struct smb_direct_io);
138 talloc_set_destructor(io, smb_direct_io_destructor);
140 io->hdr_mr = ibv_reg_mr(c->ibv.pd,
142 sizeof(io->smbd_hdr),
143 IBV_ACCESS_LOCAL_WRITE);
144 if (io->hdr_mr == NULL) {
149 io->data_mr = ibv_reg_mr(c->ibv.pd,
152 IBV_ACCESS_LOCAL_WRITE);
153 if (io->data_mr == NULL) {
158 io->sge[0].addr = (uint64_t) (uintptr_t) io->smbd_hdr;
159 io->sge[0].length = sizeof(io->smbd_hdr);
160 io->sge[0].lkey = io->hdr_mr->lkey;
161 io->sge[1].addr = (uint64_t) (uintptr_t) io->data;
162 io->sge[1].length = sizeof(io->data);
163 io->sge[1].lkey = io->data_mr->lkey;
165 io->send_wr.wr_id = (uint64_t) (uintptr_t) io;
166 io->send_wr.opcode = IBV_WR_SEND;
167 io->send_wr.send_flags = IBV_SEND_SIGNALED;
168 io->send_wr.sg_list = io->sge;
169 io->send_wr.num_sge = ARRAY_SIZE(io->sge);
171 io->recv_wr.wr_id = (uint64_t) (uintptr_t) io;
172 io->recv_wr.sg_list = io->sge;
173 io->recv_wr.num_sge = ARRAY_SIZE(io->sge);
178 static int smb_direct_io_destructor(struct smb_direct_io *io)
180 if (io->hdr_mr != NULL) {
181 ibv_dereg_mr(io->hdr_mr);
185 if (io->data_mr != NULL) {
186 ibv_dereg_mr(io->data_mr);
193 static int smb_direct_connection_destructor(struct smb_direct_connection *c);
195 struct smb_direct_connection *smb_direct_connection_create(TALLOC_CTX *mem_ctx)
197 struct smb_direct_connection *c;
202 c = talloc_zero(mem_ctx, struct smb_direct_connection);
209 talloc_set_destructor(c, smb_direct_connection_destructor);
211 c->state.max_send_size = 1364;
212 c->state.max_receive_size = SMB_DIRECT_IO_MAX_DATA;
213 c->state.max_fragmented_size = 1048576;
214 c->state.max_read_write_size = 0;
215 c->state.receive_credit_max = 16;
216 c->state.send_credit_target = 255;
217 c->state.keep_alive_internal = 5;
219 ret = socketpair(AF_UNIX, 0, SOCK_STREAM, sfd);
221 int saved_errno = errno;
226 c->sock.tmp_fd = sfd[0];
229 smb_set_close_on_exec(c->sock.tmp_fd);
230 smb_set_close_on_exec(c->sock.fd);
231 set_blocking(c->sock.fd, false);
233 c->rdma.cm_channel = rdma_create_event_channel();
234 if (c->rdma.cm_channel == NULL) {
238 smb_set_close_on_exec(c->rdma.cm_channel->fd);
239 set_blocking(c->rdma.cm_channel->fd, false);
241 #if RDMA_USER_CM_MAX_ABI_VERSION >= 2
242 ret = rdma_create_id(c->rdma.cm_channel,
247 ret = rdma_create_id(c->rdma.cm_channel,
256 c->ibv.pd = ibv_alloc_pd(c->rdma.cm_id->verbs);
257 if (c->ibv.pd == NULL) {
262 c->ibv.comp_channel = ibv_create_comp_channel(c->rdma.cm_id->verbs);
263 if (c->ibv.comp_channel == NULL) {
267 smb_set_close_on_exec(c->ibv.comp_channel->fd);
268 set_blocking(c->ibv.comp_channel->fd, false);
270 c->ibv.init_attr.cap.max_send_wr = 2;
271 c->ibv.init_attr.cap.max_recv_wr = 2;
272 c->ibv.init_attr.cap.max_recv_sge = 2;
273 c->ibv.init_attr.cap.max_send_sge = 2;
274 c->ibv.init_attr.qp_type = IBV_QPT_RC;
275 c->ibv.init_attr.sq_sig_all = 1;
277 c->ibv.send_cq = ibv_create_cq(c->rdma.cm_id->verbs,
278 c->ibv.init_attr.cap.max_send_wr,
279 c, c->ibv.comp_channel, 0);
280 if (c->ibv.send_cq == NULL) {
284 c->ibv.init_attr.send_cq = c->ibv.send_cq;
286 c->ibv.recv_cq = ibv_create_cq(c->rdma.cm_id->verbs,
287 c->ibv.init_attr.cap.max_recv_wr,
288 c, c->ibv.comp_channel, 0);
289 if (c->ibv.recv_cq == NULL) {
293 c->ibv.init_attr.recv_cq = c->ibv.recv_cq;
295 ret = ibv_req_notify_cq(c->ibv.send_cq, 0);
301 ret = ibv_req_notify_cq(c->ibv.recv_cq, 0);
307 ret = rdma_create_qp(c->rdma.cm_id, c->ibv.pd, &c->ibv.init_attr);
312 c->ibv.qp = c->rdma.cm_id->qp;
314 c->io_mem_ctx = talloc_named_const(c, 0, "io_mem_ctx");
315 if (c->io_mem_ctx == NULL) {
320 for (i = 0; i < c->state.receive_credit_max; i++) {
321 struct smb_direct_io *io;
323 io = smb_direct_io_create(c);
324 DLIST_ADD_END(c->r2s.idle, io);
327 for (i = 0; i < c->state.send_credit_target; i++) {
328 struct smb_direct_io *io;
330 io = smb_direct_io_create(c);
331 DLIST_ADD_END(c->s2r.idle, io);
337 static int smb_direct_connection_destructor(struct smb_direct_connection *c)
339 TALLOC_FREE(c->sock.fde);
341 if (c->sock.fd != -1) {
346 if (c->sock.tmp_fd != -1) {
347 close(c->sock.tmp_fd);
351 TALLOC_FREE(c->ibv.fde_channel);
352 TALLOC_FREE(c->rdma.fde_channel);
354 TALLOC_FREE(c->io_mem_ctx);
358 if (c->rdma.cm_event != NULL) {
359 rdma_ack_cm_event(c->rdma.cm_event);
360 c->rdma.cm_event = NULL;
363 if (c->ibv.qp != NULL) {
364 ibv_destroy_qp(c->ibv.qp);
368 if (c->ibv.send_cq != NULL) {
369 ibv_destroy_cq(c->ibv.send_cq);
370 c->ibv.send_cq = NULL;
373 if (c->ibv.recv_cq != NULL) {
374 ibv_destroy_cq(c->ibv.recv_cq);
375 c->ibv.recv_cq = NULL;
378 if (c->ibv.comp_channel != NULL) {
379 ibv_destroy_comp_channel(c->ibv.comp_channel);
380 c->ibv.comp_channel = NULL;
383 if (c->ibv.pd != NULL) {
384 ibv_dealloc_pd(c->ibv.pd);
388 if (c->rdma.cm_id != NULL) {
389 rdma_destroy_id(c->rdma.cm_id);
390 c->rdma.cm_id = NULL;
393 if (c->rdma.cm_channel != NULL) {
394 rdma_destroy_event_channel(c->rdma.cm_channel);
395 c->rdma.cm_channel = NULL;
401 static int smb_direct_connection_post_recv(struct smb_direct_connection *c)
403 struct smb_direct_io *io = NULL;
404 struct ibv_recv_wr *bad_recv_wr = NULL;
407 if (c->r2s.idle == NULL) {
411 for (io = c->r2s.idle; io != NULL; io = io->next) {
412 if (io->next == NULL) {
413 io->recv_wr.next = NULL;
417 io->recv_wr.next = &io->next->recv_wr;
421 ret = ibv_post_recv(c->ibv.qp, &c->r2s.idle->recv_wr, &bad_recv_wr);
424 status = map_nt_error_from_unix_common(errno);
425 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
426 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
430 DLIST_CONCATENATE(c->r2s.posted, c->r2s.idle);
436 static int smb_direct_connection_post_send(struct smb_direct_connection *c)
438 struct smb_direct_io *io = NULL;
439 struct smb_direct_io *next = NULL;
440 struct smb_direct_io *posted = NULL;
441 struct smb_direct_io *last = NULL;
442 struct ibv_send_wr *bad_send_wr = NULL;
445 if (c->s2r.ready == NULL) {
449 if (c->state.send_credits == 0) {
457 for (io = c->s2r.ready; io != NULL; io = next) {
458 uint16_t granted = 0;
460 uint32_t data_offset = 0;
464 if (c->state.send_credits == 0) {
468 c->state.send_credits -= 1;
470 if (c->state.send_credits == 0) {
474 granted = c->state.receive_credit_max;
475 granted -= c->state.receive_credits;
476 granted = MIN(granted, c->state.receive_credit_target);
477 c->state.receive_credits += granted;
479 if (io->data_length > 0) {
481 io->sge[0].length = data_offset;
482 io->sge[1].length = io->data_length;
483 io->send_wr.num_sge = 2;
485 io->sge[0].length = 0x14;
486 io->send_wr.num_sge = 1;
489 SSVAL(io->smbd_hdr, 0x00, c->state.send_credit_target);
490 SSVAL(io->smbd_hdr, 0x02, granted);
491 SSVAL(io->smbd_hdr, 0x04, flags);
492 SSVAL(io->smbd_hdr, 0x06, 0x0000);
493 SIVAL(io->smbd_hdr, 0x08, io->remaining_length);
494 SIVAL(io->smbd_hdr, 0x0C, data_offset);
495 SIVAL(io->smbd_hdr, 0x10, io->data_length);
496 SIVAL(io->smbd_hdr, 0x14, 0x00000000);
499 io->send_wr.next = &next->send_wr;
501 io->send_wr.next = NULL;
503 DLIST_REMOVE(c->s2r.ready, io);
504 DLIST_ADD_END(posted, io);
507 last = DLIST_TAIL(posted);
508 last->send_wr.next = NULL;
511 ret = ibv_post_send(c->ibv.qp, &c->s2r.ready->send_wr, &bad_send_wr);
514 // DLIST_CONCATENATE(c->s2r.ready, posted); // TODO: check bad_send_wr
515 status = map_nt_error_from_unix_common(errno);
516 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
517 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
521 DLIST_CONCATENATE(c->s2r.posted, posted);
526 static int smb_direct_connection_setup_readv(struct smb_direct_connection *c)
528 TEVENT_FD_READABLE(c->sock.fde);
529 // TODO: immediate_event?? may skips a syscall.
533 static int smb_direct_connection_setup_writev(struct smb_direct_connection *c)
535 TEVENT_FD_WRITEABLE(c->sock.fde);
536 // TODO: immediate_event?? may skips a syscall.
540 struct smb_direct_connection_rdma_connect_state {
541 struct smb_direct_connection *c;
544 static int smb_direct_connection_rdma_connect_state_destructor(
545 struct smb_direct_connection_rdma_connect_state *state)
547 struct smb_direct_connection *c = state->c;
549 TALLOC_FREE(c->rdma.fde_channel);
554 static void smb_direct_connection_rdma_connect_handler(struct tevent_context *ev,
555 struct tevent_fd *fde,
559 static struct tevent_req *smb_direct_connection_rdma_connect_send(TALLOC_CTX *mem_ctx,
560 struct tevent_context *ev,
561 struct smb_direct_connection *c,
562 const struct sockaddr_storage *src,
563 const struct sockaddr_storage *dst,
564 struct tsocket_address *local_addr,
565 struct tsocket_address *remote_addr)
567 struct tevent_req *req;
568 struct smb_direct_connection_rdma_connect_state *state;
570 //struct sockaddr *src_addr = (const struct sockaddr *)src;
571 struct sockaddr *src_addr = NULL;
572 struct sockaddr_storage _dst_addr = *dst;
573 struct sockaddr *dst_addr = (struct sockaddr *)&_dst_addr;
575 set_sockaddr_port(dst_addr, 5445);
577 req = tevent_req_create(mem_ctx, &state,
578 struct smb_direct_connection_rdma_connect_state);
584 talloc_set_destructor(state, smb_direct_connection_rdma_connect_state_destructor);
586 c->rdma.fde_channel = tevent_add_fd(ev, c,
587 c->rdma.cm_channel->fd,
589 smb_direct_connection_rdma_connect_handler,
591 if (tevent_req_nomem(c->rdma.fde_channel, req)) {
592 return tevent_req_post(req, ev);
596 ret = rdma_resolve_addr(c->rdma.cm_id,
600 NTSTATUS status = map_nt_error_from_unix_common(errno);
601 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
602 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
603 tevent_req_nterror(req, status);
604 return tevent_req_post(req, ev);
606 c->rdma.expected_event = RDMA_CM_EVENT_ADDR_RESOLVED;
611 static void smb_direct_connection_rdma_connect_handler(struct tevent_context *ev,
612 struct tevent_fd *fde,
616 struct tevent_req *req =
617 talloc_get_type_abort(private_data,
619 struct smb_direct_connection_rdma_connect_state *state =
621 struct smb_direct_connection_rdma_connect_state);
622 struct smb_direct_connection *c = state->c;
623 struct rdma_conn_param conn_param;
624 uint8_t ird_ord_hdr[8];
625 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
630 ret = rdma_get_cm_event(c->rdma.cm_channel,
633 status = map_nt_error_from_unix_common(errno);
634 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
635 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
636 tevent_req_nterror(req, status);
641 if (c->rdma.cm_event->status != 0) {
642 errno = c->rdma.cm_event->status;
643 status = map_nt_error_from_unix_common(errno);
644 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
645 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
646 tevent_req_nterror(req, status);
650 if (c->rdma.cm_event->event != c->rdma.expected_event) {
651 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
652 __location__, __FUNCTION__, ret, errno));
656 switch (c->rdma.cm_event->event) {
657 case RDMA_CM_EVENT_ADDR_RESOLVED:
659 ret = rdma_resolve_route(c->rdma.cm_id, 5000);
661 status = map_nt_error_from_unix_common(errno);
662 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
663 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
664 tevent_req_nterror(req, status);
667 c->rdma.expected_event = RDMA_CM_EVENT_ROUTE_RESOLVED;
669 case RDMA_CM_EVENT_ROUTE_RESOLVED:
673 c->ibv.pd = ibv_alloc_pd(c->rdma.cm_id->verbs);
674 if (c->ibv.pd == NULL) {
675 status = map_nt_error_from_unix_common(errno);
676 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
677 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
678 tevent_req_nterror(req, status);
682 c->ibv.comp_channel = ibv_create_comp_channel(c->rdma.cm_id->verbs);
683 if (c->ibv.comp_channel == NULL) {
684 status = map_nt_error_from_unix_common(errno);
685 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
686 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
687 tevent_req_nterror(req, status);
691 set_blocking(c->ibv.comp_channel->fd, false);
692 smb_set_close_on_exec(c->ibv.comp_channel->fd);
694 ZERO_STRUCT(init_attr);
695 init_attr.cap.max_send_wr = 2;
696 init_attr.cap.max_recv_wr = 2;
697 init_attr.cap.max_recv_sge = 2;
698 init_attr.cap.max_send_sge = 2;
699 init_attr.qp_type = IBV_QPT_RC;
700 init_attr.sq_sig_all = 1;
702 c->ibv.send_cq = ibv_create_cq(c->rdma.cm_id->verbs,
703 init_attr.cap.max_send_wr,
707 if (c->ibv.send_cq == NULL) {
708 status = map_nt_error_from_unix_common(errno);
709 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
710 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
711 tevent_req_nterror(req, status);
714 init_attr.send_cq = c->ibv.send_cq;
715 c->ibv.recv_cq = ibv_create_cq(c->rdma.cm_id->verbs,
716 init_attr.cap.max_recv_wr,
720 if (c->ibv.recv_cq == NULL) {
721 status = map_nt_error_from_unix_common(errno);
722 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
723 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
724 tevent_req_nterror(req, status);
727 init_attr.recv_cq = c->ibv.recv_cq;
730 ret = ibv_req_notify_cq(c->ibv.send_cq, 0);
732 status = map_nt_error_from_unix_common(errno);
733 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
734 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
735 tevent_req_nterror(req, status);
740 ret = ibv_req_notify_cq(c->ibv.recv_cq, 0);
742 status = map_nt_error_from_unix_common(errno);
743 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
744 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
745 tevent_req_nterror(req, status);
750 ret = rdma_create_qp(c->rdma.cm_id, c->ibv.pd,
753 status = map_nt_error_from_unix_common(errno);
754 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
755 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
756 tevent_req_nterror(req, status);
759 c->ibv.qp = c->rdma.cm_id->qp;
761 RSIVAL(ird_ord_hdr, 0, 16);
762 RSIVAL(ird_ord_hdr, 4, 0);
764 ZERO_STRUCT(conn_param);
765 conn_param.private_data = ird_ord_hdr;
766 conn_param.private_data_len = sizeof(ird_ord_hdr);
767 conn_param.responder_resources = 1;
768 conn_param.initiator_depth = 1;
769 conn_param.retry_count = 10;
772 ret = rdma_connect(c->rdma.cm_id, &conn_param);
774 status = map_nt_error_from_unix_common(errno);
775 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
776 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
777 tevent_req_nterror(req, status);
780 c->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED;
783 case RDMA_CM_EVENT_ESTABLISHED:
786 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
787 __location__, __FUNCTION__, ret, errno));
789 c->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED;
790 TALLOC_FREE(c->rdma.fde_channel);
791 rdma_ack_cm_event(c->rdma.cm_event);
792 c->rdma.cm_event = NULL;
793 tevent_req_done(req);
796 case RDMA_CM_EVENT_ADDR_ERROR:
797 case RDMA_CM_EVENT_ROUTE_ERROR:
798 case RDMA_CM_EVENT_CONNECT_REQUEST:
799 case RDMA_CM_EVENT_CONNECT_RESPONSE:
800 case RDMA_CM_EVENT_CONNECT_ERROR:
801 case RDMA_CM_EVENT_UNREACHABLE:
802 case RDMA_CM_EVENT_REJECTED:
803 case RDMA_CM_EVENT_DISCONNECTED:
804 case RDMA_CM_EVENT_DEVICE_REMOVAL:
805 case RDMA_CM_EVENT_MULTICAST_JOIN:
806 case RDMA_CM_EVENT_MULTICAST_ERROR:
807 case RDMA_CM_EVENT_ADDR_CHANGE:
808 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
809 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
810 DEBUG(0,("%s:%s: event[%d] ret[%d] errno[%d] status[%s]\n",
811 __location__, __FUNCTION__,
812 c->rdma.cm_event->event, ret, errno, nt_errstr(status)));
813 tevent_req_nterror(req, status);
817 rdma_ack_cm_event(c->rdma.cm_event);
818 c->rdma.cm_event = NULL;
821 static NTSTATUS smb_direct_connection_rdma_connect_recv(struct tevent_req *req)
823 return tevent_req_simple_recv_ntstatus(req);
826 struct smb_direct_connection_negotiate_connect_state {
827 struct smb_direct_connection *c;
830 struct ibv_sge sge[1];
831 struct ibv_send_wr wr;
834 uint8_t buffer[0x14];
836 struct ibv_sge sge[1];
837 struct ibv_send_wr wr;
840 uint8_t buffer[512];//0x20];
842 struct ibv_sge sge[1];
843 struct ibv_recv_wr wr;
848 static int smb_direct_connection_negotiate_connect_destructor(
849 struct smb_direct_connection_negotiate_connect_state *state)
851 struct smb_direct_connection *c = state->c;
853 TALLOC_FREE(c->ibv.fde_channel);
854 TALLOC_FREE(c->rdma.fde_channel);
856 // if (state->req.mr != NULL) {
857 // ibv_dereg_mr(state->req.mr);
858 // state->req.mr = NULL;
864 static void smb_direct_connection_negotiate_connect_rdma_handler(struct tevent_context *ev,
865 struct tevent_fd *fde,
868 static void smb_direct_connection_negotiate_connect_ibv_handler(struct tevent_context *ev,
869 struct tevent_fd *fde,
873 static struct tevent_req *smb_direct_connection_negotiate_connect_send(TALLOC_CTX *mem_ctx,
874 struct tevent_context *ev,
875 struct smb_direct_connection *c)
877 struct tevent_req *req;
878 struct smb_direct_connection_negotiate_connect_state *state;
879 struct smb_direct_io *rdma_read = NULL;
880 struct smb_direct_io *neg_send = NULL;
881 struct smb_direct_io *neg_recv = NULL;
882 struct ibv_recv_wr *bad_recv_wr = NULL;
883 struct ibv_send_wr *bad_send_wr = NULL;
887 req = tevent_req_create(mem_ctx, &state,
888 struct smb_direct_connection_negotiate_connect_state);
895 talloc_set_destructor(state, smb_direct_connection_negotiate_connect_destructor);
897 c->rdma.fde_channel = tevent_add_fd(ev, c, c->rdma.cm_channel->fd,
899 smb_direct_connection_negotiate_connect_rdma_handler,
901 if (tevent_req_nomem(c->rdma.fde_channel, req)) {
902 return tevent_req_post(req, ev);
904 c->ibv.fde_channel = tevent_add_fd(ev, c, c->ibv.comp_channel->fd,
906 smb_direct_connection_negotiate_connect_ibv_handler,
908 if (tevent_req_nomem(c->ibv.fde_channel, req)) {
909 return tevent_req_post(req, ev);
912 neg_recv = smb_direct_io_create(c);
913 if (tevent_req_nomem(neg_recv, req)) {
914 return tevent_req_post(req, ev);
916 neg_recv->sge[0].addr = (uint64_t) (uintptr_t) neg_recv->data;
917 neg_recv->sge[0].length = sizeof(neg_recv->data);
918 neg_recv->sge[0].lkey = neg_recv->data_mr->lkey;
919 neg_recv->recv_wr.sg_list = neg_recv->sge;
920 neg_recv->recv_wr.num_sge = 1;
922 rdma_read = smb_direct_io_create(c);
923 if (tevent_req_nomem(rdma_read, req)) {
924 return tevent_req_post(req, ev);
926 rdma_read->sge[0].addr = 1;
927 rdma_read->sge[0].length = 0;
928 rdma_read->sge[0].lkey = 1;
929 rdma_read->send_wr.opcode = IBV_WR_RDMA_READ;
930 rdma_read->send_wr.send_flags = IBV_SEND_SIGNALED;
931 rdma_read->send_wr.sg_list = rdma_read->sge;
932 rdma_read->send_wr.num_sge = 1;
933 rdma_read->send_wr.wr.rdma.rkey = 1;
934 rdma_read->send_wr.wr.rdma.remote_addr = 1;
936 neg_send = smb_direct_io_create(c);
937 if (tevent_req_nomem(neg_send, req)) {
938 return tevent_req_post(req, ev);
940 SSVAL(neg_send->data, 0x00, 0x0100);
941 SSVAL(neg_send->data, 0x02, 0x0100);
942 SSVAL(neg_send->data, 0x04, 0x0000);
943 SSVAL(neg_send->data, 0x06, c->state.send_credit_target);
944 SIVAL(neg_send->data, 0x08, c->state.max_send_size);
945 SIVAL(neg_send->data, 0x0C, c->state.max_receive_size);
946 SIVAL(neg_send->data, 0x10, c->state.max_fragmented_size);
947 neg_send->sge[0].addr = (uint64_t) (uintptr_t) neg_send->data;
948 neg_send->sge[0].length = 0x14;
949 neg_send->sge[0].lkey = neg_send->data_mr->lkey;
950 neg_send->send_wr.opcode = IBV_WR_SEND;
951 neg_send->send_wr.send_flags = IBV_SEND_SIGNALED;
952 neg_send->send_wr.sg_list = neg_send->sge;
953 neg_send->send_wr.num_sge = 1;
956 ret = ibv_post_recv(c->ibv.qp, &neg_recv->recv_wr, &bad_recv_wr);
958 status = map_nt_error_from_unix_common(errno);
959 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
960 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
961 tevent_req_nterror(req, status);
962 return tevent_req_post(req, ev);
965 rdma_read->send_wr.next = &neg_send->send_wr;
967 ret = ibv_post_send(c->ibv.qp, &rdma_read->send_wr, &bad_send_wr);
969 status = map_nt_error_from_unix_common(errno);
970 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
971 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
972 tevent_req_nterror(req, status);
973 return tevent_req_post(req, ev);
976 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
977 __location__, __FUNCTION__, ret, errno));
981 static void smb_direct_connection_negotiate_connect_rdma_handler(struct tevent_context *ev,
982 struct tevent_fd *fde,
986 struct tevent_req *req =
987 talloc_get_type_abort(private_data,
989 struct smb_direct_connection_negotiate_connect_state *state =
991 struct smb_direct_connection_negotiate_connect_state);
992 struct smb_direct_connection *c = state->c;
993 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
998 ret = rdma_get_cm_event(c->rdma.cm_channel,
1001 status = map_nt_error_from_unix_common(errno);
1002 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1003 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1004 tevent_req_nterror(req, status);
1008 if (c->rdma.cm_event->status != 0) {
1009 errno = c->rdma.cm_event->status;
1010 status = map_nt_error_from_unix_common(errno);
1011 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1012 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1013 tevent_req_nterror(req, status);
1017 switch (c->rdma.cm_event->event) {
1018 case RDMA_CM_EVENT_DISCONNECTED:
1019 status = NT_STATUS_CONNECTION_DISCONNECTED;
1020 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1021 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1022 tevent_req_nterror(req, status);
1024 case RDMA_CM_EVENT_ADDR_RESOLVED:
1025 case RDMA_CM_EVENT_ADDR_ERROR:
1026 case RDMA_CM_EVENT_ROUTE_RESOLVED:
1027 case RDMA_CM_EVENT_ESTABLISHED:
1028 case RDMA_CM_EVENT_ROUTE_ERROR:
1029 case RDMA_CM_EVENT_CONNECT_REQUEST:
1030 case RDMA_CM_EVENT_CONNECT_RESPONSE:
1031 case RDMA_CM_EVENT_CONNECT_ERROR:
1032 case RDMA_CM_EVENT_UNREACHABLE:
1033 case RDMA_CM_EVENT_REJECTED:
1034 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1035 case RDMA_CM_EVENT_MULTICAST_JOIN:
1036 case RDMA_CM_EVENT_MULTICAST_ERROR:
1037 case RDMA_CM_EVENT_ADDR_CHANGE:
1038 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1039 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1040 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1041 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1042 tevent_req_nterror(req, status);
1046 status = NT_STATUS_INTERNAL_ERROR;
1047 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1048 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1049 tevent_req_nterror(req, status);
1052 static void smb_direct_connection_negotiate_connect_ibv_handler(struct tevent_context *ev,
1053 struct tevent_fd *fde,
1057 struct tevent_req *req =
1058 talloc_get_type_abort(private_data,
1060 struct smb_direct_connection_negotiate_connect_state *state =
1061 tevent_req_data(req,
1062 struct smb_direct_connection_negotiate_connect_state);
1063 struct smb_direct_connection *c = state->c;
1064 struct ibv_cq *cq = NULL;
1065 void *cq_context = NULL;
1066 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1069 uint16_t credits_requested;
1070 uint16_t credits_granted;
1071 uint32_t max_read_write_size;
1072 uint32_t preferred_send_size;
1073 uint32_t max_receive_size;
1074 uint32_t max_fragmented_size;
1076 struct smb_direct_io *io = NULL;
1079 ret = ibv_get_cq_event(c->ibv.comp_channel,
1082 status = map_nt_error_from_unix_common(errno);
1083 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1084 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1085 tevent_req_nterror(req, status);
1089 ibv_ack_cq_events(cq, 1);
1091 if (cq_context != c) {
1092 status = NT_STATUS_INTERNAL_ERROR;;
1093 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1094 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1095 tevent_req_nterror(req, status);
1100 ret = ibv_req_notify_cq(cq, 0);
1102 status = map_nt_error_from_unix_common(errno);
1103 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1104 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1105 tevent_req_nterror(req, status);
1111 ret = ibv_poll_cq(cq, 1, &wc);
1113 status = map_nt_error_from_unix_common(errno);
1114 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1115 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1116 tevent_req_nterror(req, status);
1121 if (wc.status == IBV_WC_WR_FLUSH_ERR) {
1122 //errno = wc.status;
1123 status = map_nt_error_from_unix_common(wc.status);//errno);
1124 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1125 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1126 TALLOC_FREE(c->ibv.fde_channel);
1127 TALLOC_FREE(c->rdma.fde_channel);
1128 smb_direct_connection_negotiate_connect_rdma_handler(ev, fde, flags, private_data);
1131 if (wc.status != IBV_WC_SUCCESS) {
1133 status = map_nt_error_from_unix_common(errno);
1134 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1135 __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1136 ibv_wc_status_str(wc.status)));
1137 tevent_req_nterror(req, status);
1141 io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
1142 struct smb_direct_io);
1144 switch (wc.opcode) {
1146 DEBUG(0,("%s:%s: GOT SEND[%p] ret[%d] errno[%d]\n",
1147 __location__, __FUNCTION__, io, ret, errno));
1150 case IBV_WC_RDMA_READ:
1151 DEBUG(0,("%s:%s: GOT RDMA_READ[%p] ret[%d] errno[%d]\n",
1152 __location__, __FUNCTION__, io, ret, errno));
1156 DEBUG(0,("%s:%s: GOT RECV[%p] ret[%d] errno[%d]\n",
1157 __location__, __FUNCTION__, io, ret, errno));
1158 dump_data(0, io->data, wc.byte_len);
1159 if (wc.byte_len < 0x20) {
1160 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1161 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1162 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1163 tevent_req_nterror(req, status);
1166 if (SVAL(io->data, 0x00) != 0x0100) {
1167 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1168 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1169 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1170 tevent_req_nterror(req, status);
1173 if (SVAL(io->data, 0x02) != 0x0100) {
1174 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1175 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1176 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1177 tevent_req_nterror(req, status);
1180 if (SVAL(io->data, 0x04) != 0x0100) {
1181 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1182 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1183 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1184 tevent_req_nterror(req, status);
1187 credits_requested = SVAL(io->data, 0x08);
1188 if (credits_requested == 0) {
1189 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1190 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1191 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1192 tevent_req_nterror(req, status);
1195 credits_granted = SVAL(io->data, 0x0A);
1196 if (credits_granted == 0) {
1197 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1198 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1199 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1200 tevent_req_nterror(req, status);
1203 status = NT_STATUS(IVAL(io->data, 0x0C));
1204 if (!NT_STATUS_IS_OK(status)) {
1205 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1206 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1207 tevent_req_nterror(req, status);
1210 max_read_write_size = IVAL(io->data, 0x10);
1211 preferred_send_size = IVAL(io->data, 0x14);
1212 if (preferred_send_size > c->state.max_receive_size) {
1213 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1214 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1215 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1216 tevent_req_nterror(req, status);
1219 max_receive_size = IVAL(io->data, 0x18);
1220 if (max_receive_size < 0x80) {
1221 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1222 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1223 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1224 tevent_req_nterror(req, status);
1227 max_fragmented_size = IVAL(io->data, 0x1C);
1228 if (max_fragmented_size < 0x20000) {
1229 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1230 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1231 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1232 tevent_req_nterror(req, status);
1236 c->state.receive_credit_target = credits_requested;
1238 tmp = c->state.max_receive_size;
1239 tmp = MIN(tmp, preferred_send_size);
1240 tmp = MAX(tmp, 128);
1241 c->state.max_receive_size = tmp;
1243 tmp = c->state.max_send_size;
1244 tmp = MIN(tmp, max_receive_size);
1245 c->state.max_send_size = tmp;
1247 tmp = MIN(1048576, max_read_write_size);
1248 c->state.max_read_write_size = tmp;
1250 tmp = c->state.max_fragmented_size;
1251 tmp = MIN(tmp, max_fragmented_size);
1252 c->state.max_fragmented_size = tmp;
1254 c->state.send_credits = credits_granted;
1256 TALLOC_FREE(c->ibv.fde_channel);
1257 TALLOC_FREE(c->rdma.fde_channel);
1259 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1260 __location__, __FUNCTION__, ret, errno));
1265 ret = smb_direct_connection_post_recv(c);
1267 status = map_nt_error_from_unix_common(errno);
1268 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1269 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1270 tevent_req_nterror(req, status);
1274 tevent_req_done(req);
1277 case IBV_WC_RDMA_WRITE:
1279 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1280 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1281 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1282 tevent_req_nterror(req, status);
1287 static NTSTATUS smb_direct_connection_negotiate_connect_recv(struct tevent_req *req)
1289 return tevent_req_simple_recv_ntstatus(req);
1292 struct smb_direct_connection_connect_state {
1293 struct tevent_context *ev;
1294 struct smb_direct_connection *c;
1297 static void smb_direct_connection_connect_done_rdma(struct tevent_req *subreq);
1298 static void smb_direct_connection_connect_done_negotiate(struct tevent_req *subreq);
1300 struct tevent_req *smb_direct_connection_connect_send(TALLOC_CTX *mem_ctx,
1301 struct tevent_context *ev,
1302 struct smb_direct_connection *c,
1303 const struct sockaddr_storage *src,
1304 const struct sockaddr_storage *dst)
1306 struct tevent_req *req = NULL;
1307 struct smb_direct_connection_connect_state *state = NULL;
1308 struct tevent_req *subreq = NULL;
1310 req = tevent_req_create(mem_ctx, &state,
1311 struct smb_direct_connection_connect_state);
1318 subreq = smb_direct_connection_rdma_connect_send(state, ev, c, src, dst, NULL, NULL);
1319 if (tevent_req_nomem(subreq, req)) {
1320 return tevent_req_post(req, ev);
1322 tevent_req_set_callback(subreq,
1323 smb_direct_connection_connect_done_rdma,
1329 static void smb_direct_connection_connect_done_rdma(struct tevent_req *subreq)
1331 struct tevent_req *req =
1332 tevent_req_callback_data(subreq,
1334 struct smb_direct_connection_connect_state *state =
1335 tevent_req_data(req,
1336 struct smb_direct_connection_connect_state);
1339 status = smb_direct_connection_rdma_connect_recv(subreq);
1340 TALLOC_FREE(subreq);
1341 if (tevent_req_nterror(req, status)) {
1345 subreq = smb_direct_connection_negotiate_connect_send(state, state->ev, state->c);
1346 if (tevent_req_nomem(subreq, req)) {
1349 tevent_req_set_callback(subreq,
1350 smb_direct_connection_connect_done_negotiate,
1354 static void smb_direct_connection_connect_done_negotiate(struct tevent_req *subreq)
1356 struct tevent_req *req =
1357 tevent_req_callback_data(subreq,
1361 status = smb_direct_connection_negotiate_connect_recv(subreq);
1362 TALLOC_FREE(subreq);
1363 if (tevent_req_nterror(req, status)) {
1367 tevent_req_done(req);
1370 NTSTATUS smb_direct_connection_connect_recv(struct tevent_req *req, int *fd)
1372 struct smb_direct_connection_connect_state *state =
1373 tevent_req_data(req,
1374 struct smb_direct_connection_connect_state);
1375 struct smb_direct_connection *c = state->c;
1380 if (tevent_req_is_nterror(req, &status)) {
1381 tevent_req_received(req);
1385 *fd = c->sock.tmp_fd;
1386 c->sock.tmp_fd = -1;
1387 tevent_req_received(req);
1388 return NT_STATUS_OK;
1391 static void smb_direct_connection_disconnect(struct smb_direct_connection *c,
1394 if (NT_STATUS_IS_OK(status)) {
1395 status = NT_STATUS_UNEXPECTED_NETWORK_ERROR;
1398 smb_direct_connection_destructor(c);
1401 static void smb_direct_connection_rdma_handler(struct tevent_context *ev,
1402 struct tevent_fd *fde,
1406 struct smb_direct_connection *c =
1407 talloc_get_type_abort(private_data,
1408 struct smb_direct_connection);
1409 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1414 ret = rdma_get_cm_event(c->rdma.cm_channel,
1417 status = map_nt_error_from_unix_common(errno);
1418 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1419 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1420 smb_direct_connection_disconnect(c, status);
1424 if (c->rdma.cm_event->status != 0) {
1425 errno = c->rdma.cm_event->status;
1426 status = map_nt_error_from_unix_common(errno);
1427 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1428 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1429 smb_direct_connection_disconnect(c, status);
1433 switch (c->rdma.cm_event->event) {
1434 case RDMA_CM_EVENT_DISCONNECTED:
1435 status = NT_STATUS_CONNECTION_DISCONNECTED;
1436 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1437 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1438 smb_direct_connection_disconnect(c, status);
1440 case RDMA_CM_EVENT_ADDR_RESOLVED:
1441 case RDMA_CM_EVENT_ADDR_ERROR:
1442 case RDMA_CM_EVENT_ROUTE_RESOLVED:
1443 case RDMA_CM_EVENT_ESTABLISHED:
1444 case RDMA_CM_EVENT_ROUTE_ERROR:
1445 case RDMA_CM_EVENT_CONNECT_REQUEST:
1446 case RDMA_CM_EVENT_CONNECT_RESPONSE:
1447 case RDMA_CM_EVENT_CONNECT_ERROR:
1448 case RDMA_CM_EVENT_UNREACHABLE:
1449 case RDMA_CM_EVENT_REJECTED:
1450 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1451 case RDMA_CM_EVENT_MULTICAST_JOIN:
1452 case RDMA_CM_EVENT_MULTICAST_ERROR:
1453 case RDMA_CM_EVENT_ADDR_CHANGE:
1454 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1455 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1456 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1457 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1458 smb_direct_connection_disconnect(c, status);
1462 status = NT_STATUS_INTERNAL_ERROR;
1463 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1464 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1465 smb_direct_connection_disconnect(c, status);
1468 static void smb_direct_connection_ibv_handler(struct tevent_context *ev,
1469 struct tevent_fd *fde,
1473 struct smb_direct_connection *c =
1474 talloc_get_type_abort(private_data,
1475 struct smb_direct_connection);
1476 struct ibv_cq *cq = NULL;
1477 void *cq_context = NULL;
1478 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1481 uint16_t credits_requested;
1482 uint16_t credits_granted;
1484 uint32_t data_offset;
1485 struct smb_direct_io *io = NULL;
1488 ret = ibv_get_cq_event(c->ibv.comp_channel,
1491 status = map_nt_error_from_unix_common(errno);
1492 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1493 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1494 smb_direct_connection_disconnect(c, status);
1498 ibv_ack_cq_events(cq, 1);
1500 if (cq_context != c) {
1501 status = NT_STATUS_INTERNAL_ERROR;
1502 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1503 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1504 smb_direct_connection_disconnect(c, status);
1509 ret = ibv_req_notify_cq(cq, 0);
1511 status = map_nt_error_from_unix_common(errno);
1512 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1513 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1514 smb_direct_connection_disconnect(c, status);
1520 ret = ibv_poll_cq(cq, 1, &wc);
1522 status = map_nt_error_from_unix_common(errno);
1523 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1524 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1525 smb_direct_connection_disconnect(c, status);
1530 if (wc.status == IBV_WC_WR_FLUSH_ERR) {
1532 status = map_nt_error_from_unix_common(errno);
1533 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1534 __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1535 ibv_wc_status_str(wc.status)));
1536 TALLOC_FREE(c->ibv.fde_channel);
1537 TALLOC_FREE(c->rdma.fde_channel);
1538 smb_direct_connection_rdma_handler(ev, fde, 0 /* flags */, private_data);
1541 if (wc.status != IBV_WC_SUCCESS) {
1543 status = map_nt_error_from_unix_common(errno);
1544 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1545 __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1546 ibv_wc_status_str(wc.status)));
1547 smb_direct_connection_disconnect(c, status);
1551 io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
1552 struct smb_direct_io);
1554 switch (wc.opcode) {
1556 DEBUG(0,("%s:%s: GOT SEND[%p] ret[%d] errno[%d]\n",
1557 __location__, __FUNCTION__, io, ret, errno));
1558 DLIST_REMOVE(c->s2r.posted, io);
1559 DLIST_ADD_END(c->s2r.idle, io);
1562 ret = smb_direct_connection_setup_readv(c);
1564 status = map_nt_error_from_unix_common(errno);
1565 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1566 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1567 smb_direct_connection_disconnect(c, status);
1573 DEBUG(0,("%s:%s: GOT RECV[%p] ret[%d] errno[%d]\n",
1574 __location__, __FUNCTION__, io, ret, errno));
1575 if (wc.byte_len >= c->state.max_receive_size) {
1576 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1577 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1578 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1579 smb_direct_connection_disconnect(c, status);
1582 if (wc.byte_len < 0x14) {
1583 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1584 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1585 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1586 smb_direct_connection_disconnect(c, status);
1589 DLIST_REMOVE(c->r2s.posted, io);
1590 dump_data(0, io->smbd_hdr, MIN(wc.byte_len, sizeof(io->smbd_hdr)));
1591 credits_requested = SVAL(io->smbd_hdr, 0x00);
1592 if (credits_requested == 0) {
1593 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1594 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1595 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1596 smb_direct_connection_disconnect(c, status);
1599 credits_granted = SVAL(io->smbd_hdr, 0x02);
1600 if (credits_granted == 0) {
1601 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1602 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1603 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1604 smb_direct_connection_disconnect(c, status);
1607 flags = SVAL(io->smbd_hdr, 0x04);
1608 io->remaining_length = IVAL(io->smbd_hdr, 0x08);
1609 data_offset = IVAL(io->smbd_hdr, 0x0C);
1610 io->data_length = IVAL(io->smbd_hdr, 0x10);
1612 c->state.receive_credits -= 1;
1613 c->state.receive_credit_target = credits_requested;
1614 c->state.send_credits += credits_granted;
1616 if (data_offset == 0) {
1617 if (wc.byte_len != 0x14) {
1618 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1619 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1620 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1621 smb_direct_connection_disconnect(c, status);
1624 DLIST_ADD_END(c->r2s.idle, io);
1626 ret = smb_direct_connection_post_recv(c);
1628 status = map_nt_error_from_unix_common(errno);
1629 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1630 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1631 smb_direct_connection_disconnect(c, status);
1635 } else if (data_offset == 0x18) {
1636 if (io->data_length >= (c->state.max_receive_size - data_offset)) {
1637 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1638 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1639 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1640 smb_direct_connection_disconnect(c, status);
1644 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1645 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1646 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1647 smb_direct_connection_disconnect(c, status);
1651 if (c->r2s.remaining_length > 0) {
1652 if (io->data_length > c->r2s.remaining_length) {
1653 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1654 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1655 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1656 smb_direct_connection_disconnect(c, status);
1660 c->r2s.remaining_length -= io->data_length;
1662 if (io->remaining_length != c->r2s.remaining_length) {
1663 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1664 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1665 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1666 smb_direct_connection_disconnect(c, status);
1670 io->iov = io->_iov_array;
1671 io->iov[0].iov_base = io->data;
1672 io->iov[0].iov_len = io->data_length;
1675 uint64_t total_length = io->data_length + io->remaining_length;
1677 if (total_length >= c->state.max_fragmented_size) { //correct direction
1678 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1679 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1680 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1681 smb_direct_connection_disconnect(c, status);
1685 _smb_setlen_tcp(io->nbt_hdr, total_length);
1686 io->iov = io->_iov_array;
1687 io->iov[0].iov_base = io->nbt_hdr;
1688 io->iov[0].iov_len = sizeof(io->nbt_hdr);
1689 io->iov[1].iov_base = io->data;
1690 io->iov[1].iov_len = io->data_length;
1694 if (c->state.receive_credits == 0) {
1695 // TODO: send more credits
1696 status = map_nt_error_from_unix_common(errno);
1697 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1698 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1699 smb_direct_connection_disconnect(c, status);
1703 if (flags & ~0x0001) {
1704 status = map_nt_error_from_unix_common(errno);
1705 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1706 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1707 smb_direct_connection_disconnect(c, status);
1711 if (flags & 0x0001) {
1712 // TODO: send more credits
1713 status = map_nt_error_from_unix_common(errno);
1714 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1715 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1716 smb_direct_connection_disconnect(c, status);
1720 DLIST_ADD_END(c->r2s.ready, io);
1722 ret = smb_direct_connection_setup_writev(c);
1724 status = map_nt_error_from_unix_common(errno);
1725 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1726 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1727 smb_direct_connection_disconnect(c, status);
1732 case IBV_WC_RDMA_READ:
1733 case IBV_WC_RDMA_WRITE:
1735 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1736 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1737 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1738 smb_direct_connection_disconnect(c, status);
1743 static void smb_direct_connection_sock_handler(struct tevent_context *ev,
1744 struct tevent_fd *fde,
1748 struct smb_direct_connection *c =
1749 talloc_get_type_abort(private_data,
1750 struct smb_direct_connection);
1751 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1752 struct smb_direct_io *io = NULL;
1757 if (fde_flags & TEVENT_FD_WRITE) {
1758 if (c->r2s.out != NULL) {
1762 DLIST_REMOVE(c->r2s.ready, io);
1769 sret = writev(c->sock.fd, io->iov, io->iov_count);
1771 if (errno == EAGAIN) { // and more...
1772 TEVENT_FD_WRITEABLE(c->sock.fde);
1776 status = map_nt_error_from_unix_common(errno);
1777 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1778 __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1779 smb_direct_connection_disconnect(c, status);
1783 ok = iov_advance(&io->iov, &io->iov_count, sret);
1785 status = map_nt_error_from_unix_common(errno);
1786 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1787 __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1788 smb_direct_connection_disconnect(c, status);
1792 if (io->iov_count == 0) {
1794 DLIST_ADD_END(c->r2s.idle, io);
1798 TEVENT_FD_NOT_WRITEABLE(c->sock.fde);
1803 ret = smb_direct_connection_post_recv(c);
1805 status = map_nt_error_from_unix_common(errno);
1806 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1807 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1808 smb_direct_connection_disconnect(c, status);
1813 if (fde_flags & TEVENT_FD_READ) {
1814 if (c->s2r.in != NULL) {
1818 DLIST_REMOVE(c->s2r.idle, io);
1821 if (c->s2r.remaining_length > 0) {
1823 * We need to continue to get
1824 * the incomplete packet.
1826 io->data_length = MIN(sizeof(io->data),
1827 c->s2r.remaining_length);
1828 io->remaining_length = c->s2r.remaining_length;
1829 io->remaining_length -= io->data_length;
1830 c->s2r.remaining_length = io->remaining_length;
1832 io->iov = io->_iov_array;
1833 io->iov[0].iov_base = io->data;
1834 io->iov[0].iov_len = io->data_length;
1838 * For a new packet we need to get the length
1841 io->data_length = 0;
1842 io->remaining_length = 0;
1844 io->iov = io->_iov_array;
1845 io->iov[0].iov_base = io->nbt_hdr;
1846 io->iov[0].iov_len = sizeof(io->nbt_hdr);
1853 sret = readv(c->sock.fd, io->iov, io->iov_count);
1855 if (errno == EAGAIN) { // and more...
1856 TEVENT_FD_READABLE(c->sock.fde);
1860 status = map_nt_error_from_unix_common(errno);
1861 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1862 __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1863 smb_direct_connection_disconnect(c, status);
1867 status = NT_STATUS_CONNECTION_DISCONNECTED;
1868 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1869 __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1870 smb_direct_connection_disconnect(c, status);
1874 ok = iov_advance(&io->iov, &io->iov_count, sret);
1876 status = map_nt_error_from_unix_common(errno);
1877 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1878 __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1879 smb_direct_connection_disconnect(c, status);
1883 if (io->iov_count == 0) {
1884 if (io->data_length != 0) {
1886 * We managed to read the whole fragment
1887 * which is ready to be posted into the
1891 DLIST_ADD_END(c->r2s.ready, io);
1895 c->s2r.remaining_length = smb_len_tcp(io->nbt_hdr);
1896 if (c->s2r.remaining_length > c->state.max_fragmented_size) { //correct direction
1897 status = NT_STATUS_INVALID_BUFFER_SIZE;
1898 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1899 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1900 smb_direct_connection_disconnect(c, status);
1904 io->data_length = MIN(sizeof(io->data),
1905 c->s2r.remaining_length);
1906 io->remaining_length = c->s2r.remaining_length;
1907 io->remaining_length -= io->data_length;
1908 c->s2r.remaining_length = io->remaining_length;
1910 io->iov = io->_iov_array;
1911 io->iov[0].iov_base = io->data;
1912 io->iov[0].iov_len = io->data_length;
1916 * try to read the reset immediately.
1921 TEVENT_FD_NOT_READABLE(c->sock.fde);
1925 ret = smb_direct_connection_post_send(c);
1927 status = map_nt_error_from_unix_common(errno);
1928 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1929 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1930 smb_direct_connection_disconnect(c, status);
1935 NTSTATUS smb_direct_connection_setup_events(struct smb_direct_connection *c,
1936 struct tevent_context *ev)
1938 uint16_t sock_fde_flags = TEVENT_FD_READ;
1940 if (c->r2s.out != NULL) {
1941 sock_fde_flags |= TEVENT_FD_WRITE;
1944 if (tevent_fd_get_flags(c->ibv.fde_channel) == 0) {
1946 TALLOC_FREE(c->sock.fde);
1947 TALLOC_FREE(c->ibv.fde_channel);
1948 TALLOC_FREE(c->rdma.fde_channel);
1951 if (tevent_fd_get_flags(c->rdma.fde_channel) == 0) {
1953 TALLOC_FREE(c->sock.fde);
1954 TALLOC_FREE(c->ibv.fde_channel);
1955 TALLOC_FREE(c->rdma.fde_channel);
1960 TALLOC_FREE(c->sock.fde);
1961 TALLOC_FREE(c->ibv.fde_channel);
1962 TALLOC_FREE(c->rdma.fde_channel);
1963 } else if (ev == c->last_ev) {
1964 return NT_STATUS_OK;
1965 } else if (c->last_ev == NULL) {
1968 return NT_STATUS_INVALID_PARAMETER_MIX;
1971 c->rdma.fde_channel = tevent_add_fd(ev, c,
1972 c->rdma.cm_channel->fd,
1974 smb_direct_connection_rdma_handler,
1976 if (c->rdma.fde_channel == NULL) {
1977 return NT_STATUS_NO_MEMORY;
1979 c->ibv.fde_channel = tevent_add_fd(ev, c,
1980 c->ibv.comp_channel->fd,
1982 smb_direct_connection_ibv_handler,
1984 if (c->ibv.fde_channel == NULL) {
1985 TALLOC_FREE(c->rdma.fde_channel);
1986 return NT_STATUS_NO_MEMORY;
1988 c->sock.fde = tevent_add_fd(ev, c, c->sock.fd,
1990 smb_direct_connection_sock_handler,
1992 if (c->sock.fde == NULL) {
1993 TALLOC_FREE(c->rdma.fde_channel);
1994 TALLOC_FREE(c->ibv.fde_channel);
1995 return NT_STATUS_NO_MEMORY;
1999 return NT_STATUS_OK;
2002 #endif /* SMB_TRANSPORT_ENABLE_RDMA */