2 Unix SMB/CIFS implementation.
3 Infrastructure for SMB-Direct RDMA as transport
4 Copyright (C) Stefan Metzmacher 2012,2016
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/network.h"
23 #include "lib/util/tevent_ntstatus.h"
24 #include "lib/tsocket/tsocket.h"
25 #include "lib/util/util_net.h"
26 #include "libcli/smb/smb_common.h"
27 #include "libcli/smb/smb_direct.h"
28 #include "lib/util/dlinklist.h"
29 #include "lib/util/iov_buf.h"
30 #include "librpc/ndr/libndr.h"
32 #ifdef SMB_TRANSPORT_ENABLE_RDMA
33 #include <rdma/rdma_cma_abi.h>
34 #include <rdma/rdma_cma.h>
35 #include <infiniband/verbs.h>
37 #define SMB_DIRECT_RESPONSE_REQUESTED 0x0001
41 struct smb_direct_connection {
44 uint32_t max_send_size;
45 uint32_t max_receive_size;
46 uint32_t max_fragmented_size;
47 uint32_t max_read_write_size;
48 uint16_t send_credit_target;
49 uint16_t send_credits;
50 uint16_t receive_credit_max;
51 uint16_t receive_credit_target;
52 uint16_t receive_credits;
53 uint32_t keep_alive_internal;
56 int tmp_fd; /* given to the caller end */
58 struct tevent_fd *fde;
61 struct rdma_cm_id *cm_id;
62 struct rdma_event_channel *cm_channel;
63 struct tevent_fd *fde_channel;
64 enum rdma_cm_event_type expected_event;
65 struct rdma_cm_event *cm_event;
69 struct ibv_comp_channel *comp_channel;
70 struct tevent_fd *fde_channel;
71 struct ibv_cq *send_cq;
72 struct ibv_cq *recv_cq;
74 struct ibv_qp_init_attr init_attr;
77 TALLOC_CTX *io_mem_ctx;
80 * here we have io coming into
81 * the rdma layer, which needs to
82 * be flushed to the socketpair
84 struct smb_direct_io *idle;
85 struct smb_direct_io *posted;
86 struct smb_direct_io *ready;
87 struct smb_direct_io *out;
88 uint32_t remaining_length;
92 * here we have io coming from the socketpair
93 * which needs to be flushed into the rdma layer.
95 struct smb_direct_io *idle;
96 struct smb_direct_io *posted;
97 struct smb_direct_io *ready;
98 struct smb_direct_io *in;
99 uint32_t remaining_length;
103 struct smb_direct_connection *smb_direct_conn;
105 //#define SMB_DIRECT_IO_MAX_DATA 8192
106 #define SMB_DIRECT_IO_MAX_DATA 1364
107 #define SMB_DIRECT_DATA_MIN_HDR_SIZE 0x14
108 #define SMB_DIRECT_DATA_OFFSET NDR_ROUND(SMB_DIRECT_DATA_MIN_HDR_SIZE, 8)
110 struct smb_direct_io {
111 struct smb_direct_io *prev, *next;
113 struct ibv_mr *hdr_mr;
114 struct ibv_mr *data_mr;
115 struct ibv_sge sge[2];
117 struct ibv_recv_wr recv_wr;
118 struct ibv_send_wr send_wr;
120 struct iovec _iov_array[2];
124 uint32_t data_length;
125 uint32_t remaining_length;
127 uint8_t nbt_hdr[NBT_HDR_SIZE];
128 uint8_t smbd_hdr[SMB_DIRECT_DATA_OFFSET];
129 uint8_t data[SMB_DIRECT_IO_MAX_DATA];
132 static int smb_direct_io_destructor(struct smb_direct_io *io);
134 static struct smb_direct_io *smb_direct_io_create(struct smb_direct_connection *c)
136 struct smb_direct_io *io;
138 if (c->io_mem_ctx == NULL) {
142 io = talloc_zero(c->io_mem_ctx, struct smb_direct_io);
146 talloc_set_destructor(io, smb_direct_io_destructor);
148 io->hdr_mr = ibv_reg_mr(c->ibv.pd,
150 sizeof(io->smbd_hdr),
151 IBV_ACCESS_LOCAL_WRITE);
152 if (io->hdr_mr == NULL) {
157 io->data_mr = ibv_reg_mr(c->ibv.pd,
160 IBV_ACCESS_LOCAL_WRITE);
161 if (io->data_mr == NULL) {
166 io->sge[0].addr = (uint64_t) (uintptr_t) io->smbd_hdr;
167 io->sge[0].length = sizeof(io->smbd_hdr);
168 io->sge[0].lkey = io->hdr_mr->lkey;
169 io->sge[1].addr = (uint64_t) (uintptr_t) io->data;
170 io->sge[1].length = sizeof(io->data);
171 io->sge[1].lkey = io->data_mr->lkey;
173 io->send_wr.wr_id = (uint64_t) (uintptr_t) io;
174 io->send_wr.opcode = IBV_WR_SEND;
175 io->send_wr.send_flags = IBV_SEND_SIGNALED;
176 io->send_wr.sg_list = io->sge;
177 io->send_wr.num_sge = ARRAY_SIZE(io->sge);
179 io->recv_wr.wr_id = (uint64_t) (uintptr_t) io;
180 io->recv_wr.sg_list = io->sge;
181 io->recv_wr.num_sge = ARRAY_SIZE(io->sge);
186 static int smb_direct_io_destructor(struct smb_direct_io *io)
188 if (io->hdr_mr != NULL) {
189 ibv_dereg_mr(io->hdr_mr);
193 if (io->data_mr != NULL) {
194 ibv_dereg_mr(io->data_mr);
201 static int smb_direct_connection_destructor(struct smb_direct_connection *c);
204 #define DEBUG( _level, body ) do {\
206 (void)( ((level) <= MAX_DEBUG_LEVEL) && \
207 unlikely(DEBUGLEVEL_CLASS[ DBGC_CLASS ] >= (level)) \
208 && (dbghdrclass( level, DBGC_CLASS, __location__, __FUNCTION__ )) \
209 && (dbgtext body) ); \
212 struct smb_direct_connection *smb_direct_connection_create(TALLOC_CTX *mem_ctx)
214 struct smb_direct_connection *c;
219 c = talloc_zero(mem_ctx, struct smb_direct_connection);
225 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
226 talloc_set_destructor(c, smb_direct_connection_destructor);
228 c->state.max_send_size = 1364;
229 c->state.max_receive_size = SMB_DIRECT_IO_MAX_DATA;
230 c->state.max_fragmented_size = 1048576;
231 c->state.max_read_write_size = 0;
232 c->state.receive_credit_max = 10;//255;
233 c->state.send_credit_target = 255;
234 c->state.keep_alive_internal = 5;
236 ret = socketpair(AF_UNIX, SOCK_STREAM, 0, sfd);
238 int saved_errno = errno;
240 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
244 c->sock.tmp_fd = sfd[0];
247 DEBUG(0,("%s:%s: sock.fd[%d] sock.tmp_fd[%d]\n",
248 __location__, __func__, c->sock.fd, c->sock.tmp_fd));
250 smb_set_close_on_exec(c->sock.tmp_fd);
251 smb_set_close_on_exec(c->sock.fd);
252 set_blocking(c->sock.fd, false);
253 set_blocking(c->sock.tmp_fd, false);
255 c->rdma.cm_channel = rdma_create_event_channel();
256 if (c->rdma.cm_channel == NULL) {
258 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
261 smb_set_close_on_exec(c->rdma.cm_channel->fd);
262 set_blocking(c->rdma.cm_channel->fd, false);
264 #if RDMA_USER_CM_MAX_ABI_VERSION >= 2
265 ret = rdma_create_id(c->rdma.cm_channel,
270 ret = rdma_create_id(c->rdma.cm_channel,
276 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
280 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
284 NTSTATUS smb_direct_connection_complete_alloc(struct smb_direct_connection *c)
289 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
290 c->ibv.pd = ibv_alloc_pd(c->rdma.cm_id->verbs);
291 if (c->ibv.pd == NULL) {
292 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
293 return NT_STATUS_NO_MEMORY;
296 c->ibv.comp_channel = ibv_create_comp_channel(c->rdma.cm_id->verbs);
297 if (c->ibv.comp_channel == NULL) {
298 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
299 return NT_STATUS_NO_MEMORY;
301 smb_set_close_on_exec(c->ibv.comp_channel->fd);
302 set_blocking(c->ibv.comp_channel->fd, false);
304 c->ibv.init_attr.cap.max_send_wr = c->state.send_credit_target; // more for RDMA READ/WRITE??
305 c->ibv.init_attr.cap.max_recv_wr = c->state.receive_credit_max; // more for RDMA READ/WRITE??
306 c->ibv.init_attr.cap.max_recv_sge = 2;
307 c->ibv.init_attr.cap.max_send_sge = 2;
308 c->ibv.init_attr.qp_type = IBV_QPT_RC;
309 c->ibv.init_attr.sq_sig_all = 1;
311 c->ibv.send_cq = ibv_create_cq(c->rdma.cm_id->verbs,
312 c->ibv.init_attr.cap.max_send_wr,
313 c, c->ibv.comp_channel, 0);
314 if (c->ibv.send_cq == NULL) {
315 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
316 return NT_STATUS_NO_MEMORY;
318 c->ibv.init_attr.send_cq = c->ibv.send_cq;
320 c->ibv.recv_cq = ibv_create_cq(c->rdma.cm_id->verbs,
321 c->ibv.init_attr.cap.max_recv_wr,
322 c, c->ibv.comp_channel, 0);
323 if (c->ibv.recv_cq == NULL) {
324 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
325 return NT_STATUS_NO_MEMORY;
327 c->ibv.init_attr.recv_cq = c->ibv.recv_cq;
329 ret = ibv_req_notify_cq(c->ibv.send_cq, 0);
331 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
334 ret = ibv_req_notify_cq(c->ibv.recv_cq, 0);
336 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
337 return NT_STATUS_NO_MEMORY;
340 ret = rdma_create_qp(c->rdma.cm_id, c->ibv.pd, &c->ibv.init_attr);
342 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
343 return NT_STATUS_NO_MEMORY;
345 c->ibv.qp = c->rdma.cm_id->qp;
347 c->io_mem_ctx = talloc_named_const(c, 0, "io_mem_ctx");
348 if (c->io_mem_ctx == NULL) {
349 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
350 return NT_STATUS_NO_MEMORY;
353 for (i = 0; i < c->state.receive_credit_max; i++) {
354 struct smb_direct_io *io;
356 io = smb_direct_io_create(c);
358 DEBUG(0,("%s:%s: SETUP r2s here...\n", __location__, __func__));
359 return NT_STATUS_NO_MEMORY;
361 DLIST_ADD_END(c->r2s.idle, io);
362 //DEBUG(0,("%s:%s: SETUP r2s here...\n", __location__, __func__));
365 for (i = 0; i < c->state.send_credit_target; i++) {
366 struct smb_direct_io *io;
368 io = smb_direct_io_create(c);
370 DEBUG(0,("%s:%s: SETUP s2r here...\n", __location__, __func__));
371 return NT_STATUS_NO_MEMORY;
373 DLIST_ADD_END(c->s2r.idle, io);
374 //DEBUG(0,("%s:%s: SETUP s2r here...\n", __location__, __func__));
377 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
381 static void smb_direct_connection_debug_credits(struct smb_direct_connection *c,
382 const char *location, const char *func)
384 DEBUG(0,("%s:%s: CREDITS: RMAX[%u] RTARGET[%u] R[%u] RSIZE[%u] "
385 "STARGET[%u] S[%u] SSIZE[%u] MF[%u] MRW[%u]\n",
387 c->state.receive_credit_max,
388 c->state.receive_credit_target,
389 c->state.receive_credits,
390 c->state.max_receive_size,
391 c->state.send_credit_target,
392 c->state.send_credits,
393 c->state.max_send_size,
394 c->state.max_fragmented_size,
395 c->state.max_read_write_size));
398 static int smb_direct_connection_destructor(struct smb_direct_connection *c)
400 TALLOC_FREE(c->sock.fde);
402 if (c->sock.fd != -1) {
407 if (c->sock.tmp_fd != -1) {
408 close(c->sock.tmp_fd);
412 TALLOC_FREE(c->ibv.fde_channel);
413 TALLOC_FREE(c->rdma.fde_channel);
415 TALLOC_FREE(c->io_mem_ctx);
419 if (c->rdma.cm_event != NULL) {
420 rdma_ack_cm_event(c->rdma.cm_event);
421 c->rdma.cm_event = NULL;
424 if (c->ibv.qp != NULL) {
425 ibv_destroy_qp(c->ibv.qp);
429 if (c->ibv.send_cq != NULL) {
430 ibv_destroy_cq(c->ibv.send_cq);
431 c->ibv.send_cq = NULL;
434 if (c->ibv.recv_cq != NULL) {
435 ibv_destroy_cq(c->ibv.recv_cq);
436 c->ibv.recv_cq = NULL;
439 if (c->ibv.comp_channel != NULL) {
440 ibv_destroy_comp_channel(c->ibv.comp_channel);
441 c->ibv.comp_channel = NULL;
444 if (c->ibv.pd != NULL) {
445 ibv_dealloc_pd(c->ibv.pd);
449 if (c->rdma.cm_id != NULL) {
450 rdma_destroy_id(c->rdma.cm_id);
451 c->rdma.cm_id = NULL;
454 if (c->rdma.cm_channel != NULL) {
455 rdma_destroy_event_channel(c->rdma.cm_channel);
456 c->rdma.cm_channel = NULL;
462 static int smb_direct_connection_post_recv(struct smb_direct_connection *c)
464 struct smb_direct_io *io = NULL;
465 struct ibv_recv_wr *bad_recv_wr = NULL;
468 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
469 smb_direct_connection_debug_credits(c, __location__, __func__);
470 if (c->r2s.idle == NULL) {
471 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
475 for (io = c->r2s.idle; io != NULL; io = io->next) {
476 if (io->next == NULL) {
477 io->recv_wr.next = NULL;
481 DEBUG(0,("%s:%s: POST RECV[%p]\n", __location__, __func__, io));
482 io->recv_wr.next = &io->next->recv_wr;
486 ret = ibv_post_recv(c->ibv.qp, &c->r2s.idle->recv_wr, &bad_recv_wr);
489 status = map_nt_error_from_unix_common(errno);
490 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
491 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
495 DLIST_CONCATENATE(c->r2s.posted, c->r2s.idle);
498 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
502 static int smb_direct_connection_post_send(struct smb_direct_connection *c)
504 struct smb_direct_io *io = NULL;
505 struct smb_direct_io *next = NULL;
506 struct smb_direct_io *posted = NULL;
507 struct smb_direct_io *last = NULL;
508 struct ibv_send_wr *bad_send_wr = NULL;
511 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
512 smb_direct_connection_debug_credits(c, __location__, __func__);
513 if (c->s2r.ready == NULL) {
514 DEBUG(0,("%s:%s: none READY\n", __location__, __func__));
518 if (c->state.send_credits == 0) {
519 DEBUG(0,("%s:%s: no credits\n", __location__, __func__));
525 DEBUG(0,("%s:%s: TODO: REMOVE me ...here...\n", __location__, __func__));
530 for (io = c->s2r.ready; io != NULL; io = next) {
531 uint16_t granted = 0;
533 uint32_t data_offset = 0;
537 if (c->state.send_credits == 0) {
538 //smb_direct_connection_debug_credits(c, __location__, __func__);
542 c->state.send_credits -= 1;
544 if (c->state.send_credits == 0) {
545 flags |= SMB_DIRECT_RESPONSE_REQUESTED;
548 granted = c->state.receive_credit_max;
549 granted -= c->state.receive_credits;
550 granted = MIN(granted, c->state.receive_credit_target);
551 c->state.receive_credits += granted;
552 //smb_direct_connection_debug_credits(c, __location__, __func__);
554 // if (c->state.receive_credits == 0) {
556 // c->state.receive_credits += granted;
559 if (io->data_length > 0) {
560 data_offset = SMB_DIRECT_DATA_OFFSET;
561 io->sge[0].length = data_offset;
562 io->sge[1].length = io->data_length;
563 io->send_wr.num_sge = 2;
565 io->sge[0].length = SMB_DIRECT_DATA_MIN_HDR_SIZE;
566 io->send_wr.num_sge = 1;
569 SSVAL(io->smbd_hdr, 0x00, c->state.send_credit_target);
570 SSVAL(io->smbd_hdr, 0x02, granted);
571 SSVAL(io->smbd_hdr, 0x04, flags);
572 SSVAL(io->smbd_hdr, 0x06, 0x0000);
573 SIVAL(io->smbd_hdr, 0x08, io->remaining_length);
574 SIVAL(io->smbd_hdr, 0x0C, data_offset);
575 SIVAL(io->smbd_hdr, 0x10, io->data_length);
576 SIVAL(io->smbd_hdr, 0x14, 0x00000000);
579 io->send_wr.next = &next->send_wr;
581 io->send_wr.next = NULL;
583 io->send_wr.next = NULL;
584 DLIST_REMOVE(c->s2r.ready, io);
585 DLIST_ADD_END(posted, io);
586 DEBUG(0,("%s:%s: POST SEND[%p] data_length[%u] remaining_length[%u]\n",
587 __location__, __FUNCTION__, io,
588 (unsigned)io->data_length, (unsigned)io->remaining_length));
590 ret = ibv_post_send(c->ibv.qp, &io->send_wr, &bad_send_wr);
593 // DLIST_CONCATENATE(c->s2r.ready, posted); // TODO: check bad_send_wr
594 status = map_nt_error_from_unix_common(errno);
595 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
596 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
601 last = DLIST_TAIL(posted);
602 last->send_wr.next = NULL;
605 //ret = ibv_post_send(c->ibv.qp, &posted->send_wr, &bad_send_wr);
608 //// DLIST_CONCATENATE(c->s2r.ready, posted); // TODO: check bad_send_wr
609 // status = map_nt_error_from_unix_common(errno);
610 // DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
611 // __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
615 DLIST_CONCATENATE(c->s2r.posted, posted);
620 static int smb_direct_connection_post_keep(struct smb_direct_connection *c)
622 struct smb_direct_io *io = NULL;
625 DEBUG(0,("%s:%s: KEEP posted[%p] ready[%p] idle[%p]\n",
626 __location__, __func__, c->s2r.posted, c->s2r.ready, c->s2r.idle));
627 if (c->s2r.posted != NULL) {
628 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
632 if (c->s2r.ready != NULL) {
633 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
637 if (c->s2r.idle == NULL) {
638 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
642 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
644 DLIST_REMOVE(c->s2r.idle, io);
647 io->remaining_length = 0;
649 DLIST_ADD_END(c->s2r.ready, io);
651 ret = smb_direct_connection_post_send(c);
654 status = map_nt_error_from_unix_common(errno);
655 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
656 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
659 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
663 static int smb_direct_connection_setup_readv(struct smb_direct_connection *c)
665 DEBUG(0,("%s:%s: TEVENT_FD_READABLE on\n", __location__, __func__));
666 TEVENT_FD_READABLE(c->sock.fde);
667 // TODO: immediate_event?? may skips a syscall.
671 static int smb_direct_connection_setup_writev(struct smb_direct_connection *c)
673 DEBUG(0,("%s:%s: TEVENT_FD_WRITEABLE on\n", __location__, __func__));
674 TEVENT_FD_WRITEABLE(c->sock.fde);
675 // TODO: immediate_event?? may skips a syscall.
679 struct smb_direct_connection_rdma_connect_state {
680 struct smb_direct_connection *c;
683 static int smb_direct_connection_rdma_connect_state_destructor(
684 struct smb_direct_connection_rdma_connect_state *state)
686 struct smb_direct_connection *c = state->c;
688 TALLOC_FREE(c->rdma.fde_channel);
693 static void smb_direct_connection_rdma_connect_handler(struct tevent_context *ev,
694 struct tevent_fd *fde,
698 static struct tevent_req *smb_direct_connection_rdma_connect_send(TALLOC_CTX *mem_ctx,
699 struct tevent_context *ev,
700 struct smb_direct_connection *c,
701 const struct sockaddr_storage *src,
702 const struct sockaddr_storage *dst,
703 struct tsocket_address *local_addr,
704 struct tsocket_address *remote_addr)
706 struct tevent_req *req;
707 struct smb_direct_connection_rdma_connect_state *state;
709 //struct sockaddr *src_addr = (const struct sockaddr *)src;
710 struct sockaddr *src_addr = NULL;
711 struct sockaddr_storage _dst_addr = *dst;
712 struct sockaddr *dst_addr = (struct sockaddr *)&_dst_addr;
714 set_sockaddr_port(dst_addr, 5445);
716 req = tevent_req_create(mem_ctx, &state,
717 struct smb_direct_connection_rdma_connect_state);
723 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
724 talloc_set_destructor(state, smb_direct_connection_rdma_connect_state_destructor);
726 c->rdma.fde_channel = tevent_add_fd(ev, c,
727 c->rdma.cm_channel->fd,
729 smb_direct_connection_rdma_connect_handler,
731 if (tevent_req_nomem(c->rdma.fde_channel, req)) {
732 return tevent_req_post(req, ev);
736 ret = rdma_resolve_addr(c->rdma.cm_id,
740 NTSTATUS status = map_nt_error_from_unix_common(errno);
741 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
742 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
743 tevent_req_nterror(req, status);
744 return tevent_req_post(req, ev);
746 c->rdma.expected_event = RDMA_CM_EVENT_ADDR_RESOLVED;
751 static void smb_direct_connection_rdma_connect_handler(struct tevent_context *ev,
752 struct tevent_fd *fde,
756 struct tevent_req *req =
757 talloc_get_type_abort(private_data,
759 struct smb_direct_connection_rdma_connect_state *state =
761 struct smb_direct_connection_rdma_connect_state);
762 struct smb_direct_connection *c = state->c;
763 struct rdma_conn_param conn_param;
764 uint8_t ird_ord_hdr[8];
765 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
770 ret = rdma_get_cm_event(c->rdma.cm_channel,
773 status = map_nt_error_from_unix_common(errno);
774 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
775 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
776 tevent_req_nterror(req, status);
781 if (c->rdma.cm_event->status != 0) {
782 errno = c->rdma.cm_event->status;
783 status = map_nt_error_from_unix_common(errno);
784 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
785 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
786 tevent_req_nterror(req, status);
790 if (c->rdma.cm_event->event != c->rdma.expected_event) {
791 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
792 __location__, __FUNCTION__, ret, errno));
796 switch (c->rdma.cm_event->event) {
797 case RDMA_CM_EVENT_ADDR_RESOLVED:
799 ret = rdma_resolve_route(c->rdma.cm_id, 5000);
801 status = map_nt_error_from_unix_common(errno);
802 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
803 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
804 tevent_req_nterror(req, status);
807 c->rdma.expected_event = RDMA_CM_EVENT_ROUTE_RESOLVED;
809 case RDMA_CM_EVENT_ROUTE_RESOLVED:
812 status = smb_direct_connection_complete_alloc(c);
813 if (!NT_STATUS_IS_OK(status)) {
814 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
815 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
816 tevent_req_nterror(req, status);
820 RSIVAL(ird_ord_hdr, 0, 16);
821 RSIVAL(ird_ord_hdr, 4, 0);
823 ZERO_STRUCT(conn_param);
824 conn_param.private_data = ird_ord_hdr;
825 conn_param.private_data_len = sizeof(ird_ord_hdr);
826 conn_param.responder_resources = 1;
827 conn_param.initiator_depth = 1;
828 conn_param.retry_count = 10;
831 ret = rdma_connect(c->rdma.cm_id, &conn_param);
833 status = map_nt_error_from_unix_common(errno);
834 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
835 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
836 tevent_req_nterror(req, status);
839 c->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED;
842 case RDMA_CM_EVENT_ESTABLISHED:
845 //DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
846 // __location__, __FUNCTION__, ret, errno));
848 c->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED;
849 TALLOC_FREE(c->rdma.fde_channel);
850 rdma_ack_cm_event(c->rdma.cm_event);
851 c->rdma.cm_event = NULL;
852 tevent_req_done(req);
855 case RDMA_CM_EVENT_ADDR_ERROR:
856 case RDMA_CM_EVENT_ROUTE_ERROR:
857 case RDMA_CM_EVENT_CONNECT_REQUEST:
858 case RDMA_CM_EVENT_CONNECT_RESPONSE:
859 case RDMA_CM_EVENT_CONNECT_ERROR:
860 case RDMA_CM_EVENT_UNREACHABLE:
861 case RDMA_CM_EVENT_REJECTED:
862 case RDMA_CM_EVENT_DISCONNECTED:
863 case RDMA_CM_EVENT_DEVICE_REMOVAL:
864 case RDMA_CM_EVENT_MULTICAST_JOIN:
865 case RDMA_CM_EVENT_MULTICAST_ERROR:
866 case RDMA_CM_EVENT_ADDR_CHANGE:
867 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
868 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
869 DEBUG(0,("%s:%s: event[%d] ret[%d] errno[%d] status[%s]\n",
870 __location__, __FUNCTION__,
871 c->rdma.cm_event->event, ret, errno, nt_errstr(status)));
872 tevent_req_nterror(req, status);
876 rdma_ack_cm_event(c->rdma.cm_event);
877 c->rdma.cm_event = NULL;
880 static NTSTATUS smb_direct_connection_rdma_connect_recv(struct tevent_req *req)
882 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
883 return tevent_req_simple_recv_ntstatus(req);
886 struct smb_direct_connection_negotiate_connect_state {
887 struct smb_direct_connection *c;
890 struct ibv_sge sge[1];
891 struct ibv_send_wr wr;
894 uint8_t buffer[0x14];
896 struct ibv_sge sge[1];
897 struct ibv_send_wr wr;
900 uint8_t buffer[512];//0x20];
902 struct ibv_sge sge[1];
903 struct ibv_recv_wr wr;
908 static int smb_direct_connection_negotiate_connect_destructor(
909 struct smb_direct_connection_negotiate_connect_state *state)
911 struct smb_direct_connection *c = state->c;
913 TALLOC_FREE(c->ibv.fde_channel);
914 TALLOC_FREE(c->rdma.fde_channel);
916 // if (state->req.mr != NULL) {
917 // ibv_dereg_mr(state->req.mr);
918 // state->req.mr = NULL;
924 static void smb_direct_connection_negotiate_connect_rdma_handler(struct tevent_context *ev,
925 struct tevent_fd *fde,
928 static void smb_direct_connection_negotiate_connect_ibv_handler(struct tevent_context *ev,
929 struct tevent_fd *fde,
933 static struct tevent_req *smb_direct_connection_negotiate_connect_send(TALLOC_CTX *mem_ctx,
934 struct tevent_context *ev,
935 struct smb_direct_connection *c)
937 struct tevent_req *req;
938 struct smb_direct_connection_negotiate_connect_state *state;
939 struct smb_direct_io *rdma_read = NULL;
940 struct smb_direct_io *neg_send = NULL;
941 struct smb_direct_io *neg_recv = NULL;
942 struct ibv_recv_wr *bad_recv_wr = NULL;
943 struct ibv_send_wr *bad_send_wr = NULL;
947 req = tevent_req_create(mem_ctx, &state,
948 struct smb_direct_connection_negotiate_connect_state);
954 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
956 talloc_set_destructor(state, smb_direct_connection_negotiate_connect_destructor);
958 c->rdma.fde_channel = tevent_add_fd(ev, c, c->rdma.cm_channel->fd,
960 smb_direct_connection_negotiate_connect_rdma_handler,
962 if (tevent_req_nomem(c->rdma.fde_channel, req)) {
963 return tevent_req_post(req, ev);
965 c->ibv.fde_channel = tevent_add_fd(ev, c, c->ibv.comp_channel->fd,
967 smb_direct_connection_negotiate_connect_ibv_handler,
969 if (tevent_req_nomem(c->ibv.fde_channel, req)) {
970 return tevent_req_post(req, ev);
973 neg_recv = smb_direct_io_create(c);
974 if (tevent_req_nomem(neg_recv, req)) {
975 return tevent_req_post(req, ev);
977 neg_recv->sge[0].addr = (uint64_t) (uintptr_t) neg_recv->data;
978 neg_recv->sge[0].length = sizeof(neg_recv->data);
979 neg_recv->sge[0].lkey = neg_recv->data_mr->lkey;
980 neg_recv->recv_wr.sg_list = neg_recv->sge;
981 neg_recv->recv_wr.num_sge = 1;
983 rdma_read = smb_direct_io_create(c);
984 if (tevent_req_nomem(rdma_read, req)) {
985 return tevent_req_post(req, ev);
987 rdma_read->sge[0].addr = 1;
988 rdma_read->sge[0].length = 0;
989 rdma_read->sge[0].lkey = 1;
990 rdma_read->send_wr.opcode = IBV_WR_RDMA_READ;
991 rdma_read->send_wr.send_flags = IBV_SEND_SIGNALED;
992 rdma_read->send_wr.sg_list = rdma_read->sge;
993 rdma_read->send_wr.num_sge = 1;
994 rdma_read->send_wr.wr.rdma.rkey = 1;
995 rdma_read->send_wr.wr.rdma.remote_addr = 1;
997 neg_send = smb_direct_io_create(c);
998 if (tevent_req_nomem(neg_send, req)) {
999 return tevent_req_post(req, ev);
1001 SSVAL(neg_send->data, 0x00, 0x0100);
1002 SSVAL(neg_send->data, 0x02, 0x0100);
1003 SSVAL(neg_send->data, 0x04, 0x0000);
1004 SSVAL(neg_send->data, 0x06, c->state.send_credit_target);
1005 SIVAL(neg_send->data, 0x08, c->state.max_send_size);
1006 SIVAL(neg_send->data, 0x0C, c->state.max_receive_size);
1007 SIVAL(neg_send->data, 0x10, c->state.max_fragmented_size);
1008 neg_send->sge[0].addr = (uint64_t) (uintptr_t) neg_send->data;
1009 neg_send->sge[0].length = 0x14;
1010 neg_send->sge[0].lkey = neg_send->data_mr->lkey;
1011 neg_send->send_wr.opcode = IBV_WR_SEND;
1012 neg_send->send_wr.send_flags = IBV_SEND_SIGNALED;
1013 neg_send->send_wr.sg_list = neg_send->sge;
1014 neg_send->send_wr.num_sge = 1;
1017 ret = ibv_post_recv(c->ibv.qp, &neg_recv->recv_wr, &bad_recv_wr);
1019 status = map_nt_error_from_unix_common(errno);
1020 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1021 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1022 tevent_req_nterror(req, status);
1023 return tevent_req_post(req, ev);
1026 rdma_read->send_wr.next = &neg_send->send_wr;
1028 ret = ibv_post_send(c->ibv.qp, &rdma_read->send_wr, &bad_send_wr);
1030 status = map_nt_error_from_unix_common(errno);
1031 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1032 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1033 tevent_req_nterror(req, status);
1034 return tevent_req_post(req, ev);
1037 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1038 __location__, __FUNCTION__, ret, errno));
1042 static void smb_direct_connection_negotiate_connect_rdma_handler(struct tevent_context *ev,
1043 struct tevent_fd *fde,
1047 struct tevent_req *req =
1048 talloc_get_type_abort(private_data,
1050 struct smb_direct_connection_negotiate_connect_state *state =
1051 tevent_req_data(req,
1052 struct smb_direct_connection_negotiate_connect_state);
1053 struct smb_direct_connection *c = state->c;
1054 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1059 ret = rdma_get_cm_event(c->rdma.cm_channel,
1062 status = map_nt_error_from_unix_common(errno);
1063 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1064 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1065 tevent_req_nterror(req, status);
1069 if (c->rdma.cm_event->status != 0) {
1070 errno = c->rdma.cm_event->status;
1071 status = map_nt_error_from_unix_common(errno);
1072 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1073 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1074 tevent_req_nterror(req, status);
1078 switch (c->rdma.cm_event->event) {
1079 case RDMA_CM_EVENT_DISCONNECTED:
1080 status = NT_STATUS_CONNECTION_DISCONNECTED;
1081 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1082 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1083 tevent_req_nterror(req, status);
1085 case RDMA_CM_EVENT_ADDR_RESOLVED:
1086 case RDMA_CM_EVENT_ADDR_ERROR:
1087 case RDMA_CM_EVENT_ROUTE_RESOLVED:
1088 case RDMA_CM_EVENT_ESTABLISHED:
1089 case RDMA_CM_EVENT_ROUTE_ERROR:
1090 case RDMA_CM_EVENT_CONNECT_REQUEST:
1091 case RDMA_CM_EVENT_CONNECT_RESPONSE:
1092 case RDMA_CM_EVENT_CONNECT_ERROR:
1093 case RDMA_CM_EVENT_UNREACHABLE:
1094 case RDMA_CM_EVENT_REJECTED:
1095 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1096 case RDMA_CM_EVENT_MULTICAST_JOIN:
1097 case RDMA_CM_EVENT_MULTICAST_ERROR:
1098 case RDMA_CM_EVENT_ADDR_CHANGE:
1099 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1100 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1101 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1102 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1103 tevent_req_nterror(req, status);
1107 status = NT_STATUS_INTERNAL_ERROR;
1108 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1109 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1110 tevent_req_nterror(req, status);
1113 static void smb_direct_connection_negotiate_connect_ibv_handler(struct tevent_context *ev,
1114 struct tevent_fd *fde,
1118 struct tevent_req *req =
1119 talloc_get_type_abort(private_data,
1121 struct smb_direct_connection_negotiate_connect_state *state =
1122 tevent_req_data(req,
1123 struct smb_direct_connection_negotiate_connect_state);
1124 struct smb_direct_connection *c = state->c;
1125 struct ibv_cq *cq = NULL;
1126 void *cq_context = NULL;
1127 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1130 uint16_t credits_requested;
1131 uint16_t credits_granted;
1132 uint32_t max_read_write_size;
1133 uint32_t preferred_send_size;
1134 uint32_t max_receive_size;
1135 uint32_t max_fragmented_size;
1137 struct smb_direct_io *io = NULL;
1140 ret = ibv_get_cq_event(c->ibv.comp_channel,
1143 status = map_nt_error_from_unix_common(errno);
1144 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1145 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1146 tevent_req_nterror(req, status);
1150 ibv_ack_cq_events(cq, 1);
1152 if (cq_context != c) {
1153 status = NT_STATUS_INTERNAL_ERROR;
1154 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1155 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1156 tevent_req_nterror(req, status);
1161 ret = ibv_req_notify_cq(cq, 0);
1163 status = map_nt_error_from_unix_common(errno);
1164 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1165 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1166 tevent_req_nterror(req, status);
1172 ret = ibv_poll_cq(cq, 1, &wc);
1174 status = map_nt_error_from_unix_common(errno);
1175 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1176 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1177 tevent_req_nterror(req, status);
1182 if (wc.status == IBV_WC_WR_FLUSH_ERR) {
1183 //errno = wc.status;
1184 status = map_nt_error_from_unix_common(wc.status);//errno);
1185 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1186 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1187 TALLOC_FREE(c->ibv.fde_channel);
1188 TALLOC_FREE(c->rdma.fde_channel);
1189 smb_direct_connection_negotiate_connect_rdma_handler(ev, fde, flags, private_data);
1192 if (wc.status != IBV_WC_SUCCESS) {
1194 status = map_nt_error_from_unix_common(errno);
1195 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1196 __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1197 ibv_wc_status_str(wc.status)));
1198 tevent_req_nterror(req, status);
1202 io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
1203 struct smb_direct_io);
1205 switch (wc.opcode) {
1207 DEBUG(0,("%s:%s: GOT SEND[%p] next[%p] ret[%d] errno[%d]\n",
1208 __location__, __FUNCTION__, io, io->send_wr.next, ret, errno));
1211 case IBV_WC_RDMA_READ:
1212 DEBUG(0,("%s:%s: GOT RDMA_READ[%p] next[%p] ret[%d] errno[%d]\n",
1213 __location__, __FUNCTION__, io, io->send_wr.next, ret, errno));
1217 DEBUG(0,("%s:%s: GOT RECV[%p] next[%p] ret[%d] errno[%d]\n",
1218 __location__, __FUNCTION__, io, io->recv_wr.next, ret, errno));
1219 //dump_data(0, io->data, wc.byte_len);
1220 if (wc.byte_len < 0x20) {
1221 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1222 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1223 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1224 tevent_req_nterror(req, status);
1227 if (SVAL(io->data, 0x00) != 0x0100) {
1228 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1229 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1230 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1231 tevent_req_nterror(req, status);
1234 if (SVAL(io->data, 0x02) != 0x0100) {
1235 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1236 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1237 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1238 tevent_req_nterror(req, status);
1241 if (SVAL(io->data, 0x04) != 0x0100) {
1242 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1243 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1244 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1245 tevent_req_nterror(req, status);
1248 credits_requested = SVAL(io->data, 0x08);
1249 if (credits_requested == 0) {
1250 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1251 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1252 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1253 tevent_req_nterror(req, status);
1256 credits_granted = SVAL(io->data, 0x0A);
1257 if (credits_granted == 0) {
1258 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1259 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1260 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1261 tevent_req_nterror(req, status);
1264 status = NT_STATUS(IVAL(io->data, 0x0C));
1265 if (!NT_STATUS_IS_OK(status)) {
1266 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1267 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1268 tevent_req_nterror(req, status);
1271 max_read_write_size = IVAL(io->data, 0x10);
1272 preferred_send_size = IVAL(io->data, 0x14);
1273 if (preferred_send_size > c->state.max_receive_size) {
1274 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1275 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1276 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1277 tevent_req_nterror(req, status);
1280 max_receive_size = IVAL(io->data, 0x18);
1281 if (max_receive_size < 0x80) {
1282 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1283 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1284 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1285 tevent_req_nterror(req, status);
1288 max_fragmented_size = IVAL(io->data, 0x1C);
1289 if (max_fragmented_size < 0x20000) {
1290 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1291 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1292 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1293 tevent_req_nterror(req, status);
1297 c->state.receive_credit_target = credits_requested;
1299 tmp = c->state.max_receive_size;
1300 tmp = MIN(tmp, preferred_send_size);
1301 tmp = MAX(tmp, 128);
1302 c->state.max_receive_size = tmp;
1304 tmp = c->state.max_send_size;
1305 tmp = MIN(tmp, max_receive_size);
1306 c->state.max_send_size = tmp;
1308 tmp = MIN(1048576, max_read_write_size);
1309 c->state.max_read_write_size = tmp;
1311 tmp = c->state.max_fragmented_size;
1312 tmp = MIN(tmp, max_fragmented_size);
1313 c->state.max_fragmented_size = tmp;
1315 c->state.send_credits = credits_granted;
1317 TALLOC_FREE(c->ibv.fde_channel);
1318 TALLOC_FREE(c->rdma.fde_channel);
1320 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1321 __location__, __FUNCTION__, ret, errno));
1326 ret = smb_direct_connection_post_recv(c);
1328 status = map_nt_error_from_unix_common(errno);
1329 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1330 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1331 tevent_req_nterror(req, status);
1335 tevent_req_done(req);
1338 case IBV_WC_RDMA_WRITE:
1340 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1341 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1342 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1343 tevent_req_nterror(req, status);
1348 static NTSTATUS smb_direct_connection_negotiate_connect_recv(struct tevent_req *req)
1350 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1351 return tevent_req_simple_recv_ntstatus(req);
1354 struct smb_direct_connection_connect_state {
1355 struct tevent_context *ev;
1356 struct smb_direct_connection *c;
1359 static void smb_direct_connection_connect_done_rdma(struct tevent_req *subreq);
1360 static void smb_direct_connection_connect_done_negotiate(struct tevent_req *subreq);
1362 struct tevent_req *smb_direct_connection_connect_send(TALLOC_CTX *mem_ctx,
1363 struct tevent_context *ev,
1364 struct smb_direct_connection *c,
1365 const struct sockaddr_storage *src,
1366 const struct sockaddr_storage *dst)
1368 struct tevent_req *req = NULL;
1369 struct smb_direct_connection_connect_state *state = NULL;
1370 struct tevent_req *subreq = NULL;
1372 req = tevent_req_create(mem_ctx, &state,
1373 struct smb_direct_connection_connect_state);
1380 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1381 subreq = smb_direct_connection_rdma_connect_send(state, ev, c, src, dst, NULL, NULL);
1382 if (tevent_req_nomem(subreq, req)) {
1383 return tevent_req_post(req, ev);
1385 tevent_req_set_callback(subreq,
1386 smb_direct_connection_connect_done_rdma,
1389 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1393 static void smb_direct_connection_connect_done_rdma(struct tevent_req *subreq)
1395 struct tevent_req *req =
1396 tevent_req_callback_data(subreq,
1398 struct smb_direct_connection_connect_state *state =
1399 tevent_req_data(req,
1400 struct smb_direct_connection_connect_state);
1403 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1404 status = smb_direct_connection_rdma_connect_recv(subreq);
1405 TALLOC_FREE(subreq);
1406 if (tevent_req_nterror(req, status)) {
1410 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1411 subreq = smb_direct_connection_negotiate_connect_send(state, state->ev, state->c);
1412 if (tevent_req_nomem(subreq, req)) {
1415 tevent_req_set_callback(subreq,
1416 smb_direct_connection_connect_done_negotiate,
1418 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1421 static void smb_direct_connection_connect_done_negotiate(struct tevent_req *subreq)
1423 struct tevent_req *req =
1424 tevent_req_callback_data(subreq,
1428 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1429 status = smb_direct_connection_negotiate_connect_recv(subreq);
1430 TALLOC_FREE(subreq);
1431 if (tevent_req_nterror(req, status)) {
1435 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1436 tevent_req_done(req);
1439 NTSTATUS smb_direct_connection_connect_recv(struct tevent_req *req, int *fd)
1441 struct smb_direct_connection_connect_state *state =
1442 tevent_req_data(req,
1443 struct smb_direct_connection_connect_state);
1444 struct smb_direct_connection *c = state->c;
1449 if (tevent_req_is_nterror(req, &status)) {
1450 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1451 tevent_req_received(req);
1455 DEBUG(0,("%s:%s: sock.fd[%d] sock.tmp_fd[%d]\n",
1456 __location__, __func__, c->sock.fd, c->sock.tmp_fd));
1457 *fd = c->sock.tmp_fd;
1458 c->sock.tmp_fd = -1;
1459 tevent_req_received(req);
1460 return NT_STATUS_OK;
1463 static void smb_direct_connection_disconnect(struct smb_direct_connection *c,
1466 if (NT_STATUS_IS_OK(status)) {
1467 status = NT_STATUS_UNEXPECTED_NETWORK_ERROR;
1470 smb_direct_connection_destructor(c);
1473 static void smb_direct_connection_rdma_handler(struct tevent_context *ev,
1474 struct tevent_fd *fde,
1478 struct smb_direct_connection *c =
1479 talloc_get_type_abort(private_data,
1480 struct smb_direct_connection);
1481 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1486 ret = rdma_get_cm_event(c->rdma.cm_channel,
1489 status = map_nt_error_from_unix_common(errno);
1490 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1491 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1492 smb_direct_connection_disconnect(c, status);
1496 if (c->rdma.cm_event->status != 0) {
1497 errno = c->rdma.cm_event->status;
1498 status = map_nt_error_from_unix_common(errno);
1499 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1500 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1501 smb_direct_connection_disconnect(c, status);
1505 switch (c->rdma.cm_event->event) {
1506 case RDMA_CM_EVENT_DISCONNECTED:
1507 status = NT_STATUS_CONNECTION_DISCONNECTED;
1508 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1509 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1510 smb_direct_connection_disconnect(c, status);
1512 case RDMA_CM_EVENT_ADDR_RESOLVED:
1513 case RDMA_CM_EVENT_ADDR_ERROR:
1514 case RDMA_CM_EVENT_ROUTE_RESOLVED:
1515 case RDMA_CM_EVENT_ESTABLISHED:
1516 case RDMA_CM_EVENT_ROUTE_ERROR:
1517 case RDMA_CM_EVENT_CONNECT_REQUEST:
1518 case RDMA_CM_EVENT_CONNECT_RESPONSE:
1519 case RDMA_CM_EVENT_CONNECT_ERROR:
1520 case RDMA_CM_EVENT_UNREACHABLE:
1521 case RDMA_CM_EVENT_REJECTED:
1522 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1523 case RDMA_CM_EVENT_MULTICAST_JOIN:
1524 case RDMA_CM_EVENT_MULTICAST_ERROR:
1525 case RDMA_CM_EVENT_ADDR_CHANGE:
1526 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1527 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1528 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1529 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1530 smb_direct_connection_disconnect(c, status);
1534 status = NT_STATUS_INTERNAL_ERROR;
1535 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1536 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1537 smb_direct_connection_disconnect(c, status);
1540 static void smb_direct_connection_ibv_handler(struct tevent_context *ev,
1541 struct tevent_fd *fde,
1545 struct smb_direct_connection *c =
1546 talloc_get_type_abort(private_data,
1547 struct smb_direct_connection);
1548 struct ibv_cq *cq = NULL;
1549 void *cq_context = NULL;
1550 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1553 uint16_t credits_requested;
1554 uint32_t credits_granted;
1556 uint32_t data_offset;
1557 struct smb_direct_io *io = NULL;
1560 ret = ibv_get_cq_event(c->ibv.comp_channel,
1563 status = map_nt_error_from_unix_common(errno);
1564 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1565 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1566 smb_direct_connection_disconnect(c, status);
1570 ibv_ack_cq_events(cq, 1);
1572 if (cq_context != c) {
1573 status = NT_STATUS_INTERNAL_ERROR;
1574 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1575 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1576 smb_direct_connection_disconnect(c, status);
1581 ret = ibv_req_notify_cq(cq, 0);
1583 status = map_nt_error_from_unix_common(errno);
1584 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1585 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1586 smb_direct_connection_disconnect(c, status);
1592 DEBUG(0,("%s:%s: try[%d] ret[%d] errno[%d] status[%s]\n",
1593 __location__, __FUNCTION__, try, ret, errno, nt_errstr(status)));
1596 ret = ibv_poll_cq(cq, 1, &wc);
1597 if (ret == 0 && try > 1) {
1599 * The queue is empty...
1604 status = map_nt_error_from_unix_common(errno);
1605 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1606 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1607 smb_direct_connection_disconnect(c, status);
1612 if (wc.status == IBV_WC_WR_FLUSH_ERR) {
1614 status = map_nt_error_from_unix_common(errno);
1615 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1616 __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1617 ibv_wc_status_str(wc.status)));
1618 TALLOC_FREE(c->ibv.fde_channel);
1619 TALLOC_FREE(c->rdma.fde_channel);
1620 smb_direct_connection_rdma_handler(ev, fde, 0 /* flags */, private_data);
1623 if (wc.status != IBV_WC_SUCCESS) {
1625 status = map_nt_error_from_unix_common(errno);
1626 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1627 __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1628 ibv_wc_status_str(wc.status)));
1629 smb_direct_connection_disconnect(c, status);
1633 io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
1634 struct smb_direct_io);
1636 switch (wc.opcode) {
1638 DEBUG(0,("%s:%s: GOT SEND[%p] data_length[%u] remaining_length[%u] ret[%d] errno[%d]\n",
1639 __location__, __FUNCTION__, io,
1640 (unsigned)io->data_length, (unsigned)io->remaining_length, ret, errno));
1641 DLIST_REMOVE(c->s2r.posted, io);
1642 DLIST_ADD_END(c->s2r.idle, io);
1645 ret = smb_direct_connection_setup_readv(c);
1647 status = map_nt_error_from_unix_common(errno);
1648 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1649 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1650 smb_direct_connection_disconnect(c, status);
1657 DEBUG(0,("%s:%s: GOT RECV[%p] ret[%d] errno[%d]\n",
1658 __location__, __FUNCTION__, io, ret, errno));
1659 if (wc.byte_len > c->state.max_receive_size) {
1660 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1661 DEBUG(0,("%s:%s: wc.byte_len[%u/0x%x] max_receive_size[%u/0x%x] ret[%d] errno[%d] status[%s]\n",
1662 __location__, __FUNCTION__,
1663 (unsigned)wc.byte_len,
1664 (unsigned)wc.byte_len,
1665 (unsigned)c->state.max_receive_size,
1666 (unsigned)c->state.max_receive_size,
1667 ret, errno, nt_errstr(status)));
1668 smb_direct_connection_disconnect(c, status);
1671 if (wc.byte_len < SMB_DIRECT_DATA_MIN_HDR_SIZE) {
1672 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1673 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1674 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1675 smb_direct_connection_disconnect(c, status);
1678 DLIST_REMOVE(c->r2s.posted, io);
1679 //dump_data(0, io->smbd_hdr, MIN(wc.byte_len, sizeof(io->smbd_hdr)));
1680 credits_requested = SVAL(io->smbd_hdr, 0x00);
1681 if (credits_requested == 0) {
1682 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1683 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1684 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1685 smb_direct_connection_disconnect(c, status);
1688 credits_granted = SVAL(io->smbd_hdr, 0x02);
1689 flags = SVAL(io->smbd_hdr, 0x04);
1690 io->remaining_length = IVAL(io->smbd_hdr, 0x08);
1691 data_offset = IVAL(io->smbd_hdr, 0x0C);
1692 io->data_length = IVAL(io->smbd_hdr, 0x10);
1694 c->state.receive_credits -= 1;
1695 c->state.receive_credit_target = credits_requested;
1697 credits_granted += c->state.send_credits;
1698 if (credits_granted > c->state.send_credit_target) {
1699 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1700 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1701 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1702 smb_direct_connection_disconnect(c, status);
1705 c->state.send_credits = credits_granted;
1707 smb_direct_connection_debug_credits(c, __location__, __func__);
1708 if (data_offset == 0) {
1709 if (wc.byte_len != SMB_DIRECT_DATA_MIN_HDR_SIZE) {
1710 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1711 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1712 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1713 smb_direct_connection_disconnect(c, status);
1716 DLIST_ADD_END(c->r2s.idle, io);
1718 ret = smb_direct_connection_post_recv(c);
1720 status = map_nt_error_from_unix_common(errno);
1721 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1722 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1723 smb_direct_connection_disconnect(c, status);
1728 } else if (data_offset == SMB_DIRECT_DATA_OFFSET) {
1729 if (io->data_length > (c->state.max_receive_size - data_offset)) {
1730 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1731 DEBUG(0,("%s:%s: io->data_length[%u/0x%x] max_receive_size-data_offset[%u/0x%x] ret[%d] errno[%d] status[%s]\n",
1732 __location__, __FUNCTION__,
1733 (unsigned)io->data_length,
1734 (unsigned)io->data_length,
1735 (unsigned)c->state.max_receive_size - data_offset,
1736 (unsigned)c->state.max_receive_size - data_offset,
1737 ret, errno, nt_errstr(status)));
1738 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1739 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1740 smb_direct_connection_disconnect(c, status);
1744 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1745 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1746 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1747 smb_direct_connection_disconnect(c, status);
1751 if (c->r2s.remaining_length > 0) {
1752 if (io->data_length > c->r2s.remaining_length) {
1753 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1754 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1755 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1756 smb_direct_connection_disconnect(c, status);
1760 c->r2s.remaining_length -= io->data_length;
1763 if (io->remaining_length > c->r2s.remaining_length) {
1764 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1765 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1766 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1767 smb_direct_connection_disconnect(c, status);
1771 io->iov = io->_iov_array;
1772 io->iov[0].iov_base = io->data;
1773 io->iov[0].iov_len = io->data_length;
1775 DEBUG(0,("%s:%s: CONTINUE[%p] io->data_length[%u] io->remaining_length[%u]\n",
1776 __location__, __func__, io,
1777 (unsigned)io->data_length,
1778 (unsigned)io->remaining_length));
1780 uint64_t total_length = io->data_length + io->remaining_length;
1782 if (total_length >= c->state.max_fragmented_size) { //correct direction
1783 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1784 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1785 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1786 smb_direct_connection_disconnect(c, status);
1790 _smb_setlen_tcp(io->nbt_hdr, total_length);
1791 io->iov = io->_iov_array;
1792 io->iov[0].iov_base = io->nbt_hdr;
1793 io->iov[0].iov_len = sizeof(io->nbt_hdr);
1794 io->iov[1].iov_base = io->data;
1795 io->iov[1].iov_len = io->data_length;
1797 DEBUG(0,("%s:%s: START[%p] total_length[%u] io->data_length[%u] io->remaining_length[%u]\n",
1798 __location__, __func__, io, (unsigned)total_length,
1799 (unsigned)io->data_length,
1800 (unsigned)io->remaining_length));
1802 c->r2s.remaining_length = io->remaining_length;;
1805 if (c->state.receive_credits == 0) {
1806 ret = smb_direct_connection_post_keep(c);
1808 status = map_nt_error_from_unix_common(errno);
1809 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1810 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1811 smb_direct_connection_disconnect(c, status);
1816 if (flags & ~SMB_DIRECT_RESPONSE_REQUESTED) {
1817 status = map_nt_error_from_unix_common(errno);
1818 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1819 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1820 smb_direct_connection_disconnect(c, status);
1824 if (flags & SMB_DIRECT_RESPONSE_REQUESTED) {
1825 ret = smb_direct_connection_post_keep(c);
1827 status = map_nt_error_from_unix_common(errno);
1828 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1829 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1830 smb_direct_connection_disconnect(c, status);
1835 DLIST_ADD_END(c->r2s.ready, io);
1837 ret = smb_direct_connection_setup_writev(c);
1839 status = map_nt_error_from_unix_common(errno);
1840 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1841 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1842 smb_direct_connection_disconnect(c, status);
1848 case IBV_WC_RDMA_READ:
1849 case IBV_WC_RDMA_WRITE:
1851 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1852 DEBUG(0,("%s:%s: GOT OPCODE[%u] IO[%p] ret[%d] errno[%d] status[%s]\n",
1853 __location__, __FUNCTION__, wc.opcode, io, ret, errno, nt_errstr(status)));
1854 smb_direct_connection_disconnect(c, status);
1859 static void smb_direct_connection_sock_handler(struct tevent_context *ev,
1860 struct tevent_fd *fde,
1864 struct smb_direct_connection *c =
1865 talloc_get_type_abort(private_data,
1866 struct smb_direct_connection);
1867 NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1868 struct smb_direct_io *io = NULL;
1872 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1874 if (fde_flags & TEVENT_FD_WRITE) {
1875 if (c->r2s.out != NULL) {
1876 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1879 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1882 DLIST_REMOVE(c->r2s.ready, io);
1890 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1891 sret = writev(c->sock.fd, io->iov, io->iov_count);
1893 if (errno == EAGAIN) { // and more...
1894 DEBUG(0,("%s:%s: TEVENT_FD_NOT_WRITEABLE writev...\n", __location__, __func__));
1895 TEVENT_FD_WRITEABLE(c->sock.fde);
1899 status = map_nt_error_from_unix_common(errno);
1900 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1901 __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1902 smb_direct_connection_disconnect(c, status);
1906 ok = iov_advance(&io->iov, &io->iov_count, sret);
1908 status = map_nt_error_from_unix_common(errno);
1909 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1910 __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1911 smb_direct_connection_disconnect(c, status);
1915 if (io->iov_count == 0) {
1917 DLIST_ADD_END(c->r2s.idle, io);
1918 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1922 DEBUG(0,("%s:%s: TEVENT_FD_NOT_WRITEABLE\n", __location__, __func__));
1923 TEVENT_FD_NOT_WRITEABLE(c->sock.fde);
1928 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1929 ret = smb_direct_connection_post_recv(c);
1931 status = map_nt_error_from_unix_common(errno);
1932 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1933 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1934 smb_direct_connection_disconnect(c, status);
1939 if (fde_flags & TEVENT_FD_READ) {
1940 if (c->s2r.in != NULL) {
1941 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1944 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1947 DLIST_REMOVE(c->s2r.idle, io);
1950 if (c->s2r.remaining_length > 0) {
1951 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1953 * We need to continue to get
1954 * the incomplete packet.
1956 io->data_length = MIN(c->state.max_send_size - SMB_DIRECT_DATA_OFFSET,
1957 c->s2r.remaining_length);
1958 io->remaining_length = c->s2r.remaining_length;
1959 io->remaining_length -= io->data_length;
1960 c->s2r.remaining_length = io->remaining_length;
1962 io->iov = io->_iov_array;
1963 io->iov[0].iov_base = io->data;
1964 io->iov[0].iov_len = io->data_length;
1966 DEBUG(0,("%s:%s: CONTINUE[%p] io->data_length[%u] io->remaining_length[%u]\n",
1967 __location__, __func__, io,
1968 (unsigned)io->data_length,
1969 (unsigned)io->remaining_length));
1971 DEBUG(0,("%s:%s: WAIT[%p] for NBT\n", __location__, __func__, io));
1973 * For a new packet we need to get the length
1976 io->data_length = 0;
1977 io->remaining_length = 0;
1979 io->iov = io->_iov_array;
1980 io->iov[0].iov_base = io->nbt_hdr;
1981 io->iov[0].iov_len = sizeof(io->nbt_hdr);
1989 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1990 sret = readv(c->sock.fd, io->iov, io->iov_count);
1992 if (errno == EAGAIN) { // and more...
1993 DEBUG(0,("%s:%s: TEVENT_FD_READABLE...\n", __location__, __func__));
1994 TEVENT_FD_READABLE(c->sock.fde);
1998 status = map_nt_error_from_unix_common(errno);
1999 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2000 __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
2001 smb_direct_connection_disconnect(c, status);
2005 status = NT_STATUS_CONNECTION_DISCONNECTED;
2006 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2007 __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
2008 smb_direct_connection_disconnect(c, status);
2012 ok = iov_advance(&io->iov, &io->iov_count, sret);
2014 status = map_nt_error_from_unix_common(errno);
2015 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2016 __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
2017 smb_direct_connection_disconnect(c, status);
2021 if (io->iov_count == 0) {
2022 if (io->data_length != 0) {
2023 DEBUG(0,("%s:%s: FINISH[%p] io->data_length[%u] io->remaining_length[%u]\n",
2024 __location__, __func__, io, (unsigned)io->data_length, (unsigned)io->remaining_length));
2026 * We managed to read the whole fragment
2027 * which is ready to be posted into the
2031 DLIST_ADD_END(c->s2r.ready, io);
2035 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2036 c->s2r.remaining_length = smb_len_tcp(io->nbt_hdr);
2037 if (c->s2r.remaining_length > c->state.max_fragmented_size) { //correct direction
2038 status = NT_STATUS_INVALID_BUFFER_SIZE;
2039 DEBUG(0,("%s:%s: remaining[%u] max_fragmented[%u] ret[%d] errno[%d] status[%s]\n",
2040 __location__, __FUNCTION__,
2041 (unsigned)c->s2r.remaining_length,
2042 (unsigned)c->state.max_fragmented_size,
2043 ret, errno, nt_errstr(status)));
2044 smb_direct_connection_disconnect(c, status);
2048 io->data_length = MIN(c->state.max_send_size - SMB_DIRECT_DATA_OFFSET,
2049 c->s2r.remaining_length);
2050 io->remaining_length = c->s2r.remaining_length;
2051 io->remaining_length -= io->data_length;
2053 io->iov = io->_iov_array;
2054 io->iov[0].iov_base = io->data;
2055 io->iov[0].iov_len = io->data_length;
2058 DEBUG(0,("%s:%s: AFTER[%p] total[%u] io->data_length[%u] io->remaining_length[%u]\n",
2059 __location__, __func__, io, (unsigned)c->s2r.remaining_length,
2060 (unsigned)io->data_length,
2061 (unsigned)io->remaining_length));
2062 c->s2r.remaining_length = io->remaining_length;
2064 * try to read the reset immediately.
2066 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2070 DEBUG(0,("%s:%s: TEVENT_FD_NOT_READABLE...\n", __location__, __func__));
2071 TEVENT_FD_NOT_READABLE(c->sock.fde);
2076 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2077 ret = smb_direct_connection_post_send(c);
2079 status = map_nt_error_from_unix_common(errno);
2080 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2081 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2082 smb_direct_connection_disconnect(c, status);
2087 NTSTATUS smb_direct_connection_setup_events(struct smb_direct_connection *c,
2088 struct tevent_context *ev)
2090 uint16_t sock_fde_flags = TEVENT_FD_READ;
2092 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2093 if (c->r2s.out != NULL) {
2094 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2095 sock_fde_flags |= TEVENT_FD_WRITE;
2098 if (c->rdma.cm_channel == NULL) {
2099 return NT_STATUS_CONNECTION_DISCONNECTED;
2102 if (tevent_fd_get_flags(c->ibv.fde_channel) == 0) {
2104 TALLOC_FREE(c->sock.fde);
2105 TALLOC_FREE(c->ibv.fde_channel);
2106 TALLOC_FREE(c->rdma.fde_channel);
2107 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2110 if (tevent_fd_get_flags(c->rdma.fde_channel) == 0) {
2112 TALLOC_FREE(c->sock.fde);
2113 TALLOC_FREE(c->ibv.fde_channel);
2114 TALLOC_FREE(c->rdma.fde_channel);
2115 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2119 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2121 TALLOC_FREE(c->sock.fde);
2122 TALLOC_FREE(c->ibv.fde_channel);
2123 TALLOC_FREE(c->rdma.fde_channel);
2124 return NT_STATUS_OK;
2127 if (ev == c->last_ev) {
2128 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2129 return NT_STATUS_OK;
2132 if (c->last_ev != NULL) {
2133 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2134 return NT_STATUS_INVALID_PARAMETER_MIX;
2137 c->rdma.fde_channel = tevent_add_fd(ev, c,
2138 c->rdma.cm_channel->fd,
2140 smb_direct_connection_rdma_handler,
2142 if (c->rdma.fde_channel == NULL) {
2143 return NT_STATUS_NO_MEMORY;
2145 c->ibv.fde_channel = tevent_add_fd(ev, c,
2146 c->ibv.comp_channel->fd,
2148 smb_direct_connection_ibv_handler,
2150 if (c->ibv.fde_channel == NULL) {
2151 TALLOC_FREE(c->rdma.fde_channel);
2152 return NT_STATUS_NO_MEMORY;
2154 c->sock.fde = tevent_add_fd(ev, c, c->sock.fd,
2156 smb_direct_connection_sock_handler,
2158 if (c->sock.fde == NULL) {
2159 TALLOC_FREE(c->rdma.fde_channel);
2160 TALLOC_FREE(c->ibv.fde_channel);
2161 return NT_STATUS_NO_MEMORY;
2164 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2166 return NT_STATUS_OK;
2169 struct smb_direct_connection *smb_direct_connection_listener(
2170 TALLOC_CTX *mem_ctx,
2171 struct rdma_cm_id *cm_id)
2173 struct smb_direct_connection *c;
2178 c = talloc_zero(mem_ctx, struct smb_direct_connection);
2183 c->sock.tmp_fd = -1;
2184 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2185 talloc_set_destructor(c, smb_direct_connection_destructor);
2187 c->state.max_send_size = 1364;
2188 c->state.max_receive_size = SMB_DIRECT_IO_MAX_DATA;
2189 c->state.max_fragmented_size = 1048576;
2190 c->state.max_read_write_size = 0;
2191 c->state.receive_credit_max = 10;//255;
2192 c->state.send_credit_target = 255;
2193 c->state.keep_alive_internal = 5;
2195 ret = socketpair(AF_UNIX, SOCK_STREAM, 0, sfd);
2197 int saved_errno = errno;
2199 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2200 errno = saved_errno;
2203 c->sock.tmp_fd = sfd[0];
2204 c->sock.fd = sfd[1];
2206 DEBUG(0,("%s:%s: sock.fd[%d] sock.tmp_fd[%d]\n",
2207 __location__, __func__, c->sock.fd, c->sock.tmp_fd));
2209 smb_set_close_on_exec(c->sock.tmp_fd);
2210 smb_set_close_on_exec(c->sock.fd);
2211 set_blocking(c->sock.fd, false);
2213 c->rdma.cm_channel = rdma_create_event_channel();
2214 if (c->rdma.cm_channel == NULL) {
2216 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2219 smb_set_close_on_exec(c->rdma.cm_channel->fd);
2220 set_blocking(c->rdma.cm_channel->fd, false);
2222 c->rdma.cm_id = cm_id;
2227 uint32_t smb_direct_connection_max_fragmented_size(struct smb_direct_connection *c)
2229 return c->state.max_fragmented_size;
2232 uint32_t smb_direct_connection_max_read_write_size(struct smb_direct_connection *c)
2234 return c->state.max_read_write_size;
2236 #endif /* SMB_TRANSPORT_ENABLE_RDMA */