2 Unix SMB/CIFS implementation.
3 Infrastructure for SMB-Direct RDMA as transport
4 Copyright (C) Stefan Metzmacher 2012
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/network.h"
23 #include "../util/tevent_ntstatus.h"
24 #include "../lib/tsocket/tsocket.h"
26 #include "lib/util/util_net.h" //TODO
28 #ifdef SMB_TRANSPORT_ENABLE_RDMA
29 #include <rdma/rdma_cma_abi.h>
30 #include <rdma/rdma_cma.h>
31 #include <infiniband/verbs.h>
33 struct smb_direct_transport {
35 struct rdma_cm_id *cm_id;
36 struct rdma_event_channel *cm_channel;
37 struct tevent_fd *fde_channel;
38 enum rdma_cm_event_type expected_event;
39 struct rdma_cm_event *cm_event;
43 struct ibv_comp_channel *comp_channel;
44 struct tevent_fd *fde_channel;
50 static int smb_direct_transport_destructor(struct smb_direct_transport *t);
52 struct smb_direct_transport *smb_direct_transport_create(TALLOC_CTX *mem_ctx);
54 struct smb_direct_transport *smb_direct_transport_create(TALLOC_CTX *mem_ctx)
56 struct smb_direct_transport *t;
59 t = talloc_zero(mem_ctx, struct smb_direct_transport);
63 talloc_set_destructor(t, smb_direct_transport_destructor);
65 t->rdma.cm_channel = rdma_create_event_channel();
66 if (t->rdma.cm_channel == NULL) {
71 #if RDMA_USER_CM_MAX_ABI_VERSION >= 2
72 ret = rdma_create_id(t->rdma.cm_channel,
76 ret = rdma_create_id(t->rdma.cm_channel,
88 static int smb_direct_transport_destructor(struct smb_direct_transport *t)
90 TALLOC_FREE(t->ibv.fde_channel);
91 TALLOC_FREE(t->rdma.fde_channel);
93 if (t->ibv.qp != NULL) {
94 ibv_destroy_qp(t->ibv.qp);
98 if (t->ibv.cq != NULL) {
99 ibv_destroy_cq(t->ibv.cq);
103 if (t->ibv.comp_channel != NULL) {
104 ibv_destroy_comp_channel(t->ibv.comp_channel);
105 t->ibv.comp_channel = NULL;
108 if (t->ibv.pd != NULL) {
109 ibv_dealloc_pd(t->ibv.pd);
113 if (t->rdma.cm_event != NULL) {
114 rdma_ack_cm_event(t->rdma.cm_event);
115 t->rdma.cm_event = NULL;
118 if (t->rdma.cm_id != NULL) {
119 rdma_destroy_id(t->rdma.cm_id);
120 t->rdma.cm_id = NULL;
123 if (t->rdma.cm_channel != NULL) {
124 rdma_destroy_event_channel(t->rdma.cm_channel);
125 t->rdma.cm_channel = NULL;
131 struct smb_direct_rdma_connect_state {
132 struct smb_direct_transport *t;
135 struct tevent_req *smb_direct_rdma_connect_send(TALLOC_CTX *mem_ctx,
136 struct tevent_context *ev,
137 struct smb_direct_transport *transport,
138 const struct sockaddr_storage *addr,
139 struct tsocket_address *local_addr,
140 struct tsocket_address *remote_addr);
141 NTSTATUS smb_direct_rdma_connect_recv(struct tevent_req *req);
143 static void smb_direct_rdma_connect_handler(struct tevent_context *ev,
144 struct tevent_fd *fde,
148 struct tevent_req *smb_direct_rdma_connect_send(TALLOC_CTX *mem_ctx,
149 struct tevent_context *ev,
150 struct smb_direct_transport *transport,
151 const struct sockaddr_storage *_addr,
152 struct tsocket_address *local_addr,
153 struct tsocket_address *remote_addr)
155 struct tevent_req *req;
156 struct smb_direct_rdma_connect_state *state;
158 struct sockaddr_storage addr = *_addr;
159 struct sockaddr *src_addr = NULL, *dst_addr = &addr;
161 set_sockaddr_port(dst_addr, 5445);
163 req = tevent_req_create(mem_ctx, &state,
164 struct smb_direct_rdma_connect_state);
168 state->t = transport;
170 transport->rdma.fde_channel = tevent_add_fd(ev, transport,
171 transport->rdma.cm_channel->fd,
173 smb_direct_rdma_connect_handler,
175 if (tevent_req_nomem(transport->rdma.fde_channel, req)) {
176 return tevent_req_post(req, ev);
180 ret = rdma_resolve_addr(state->t->rdma.cm_id,
184 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
185 __location__, __FUNCTION__, ret, errno));
187 state->t->rdma.expected_event = RDMA_CM_EVENT_ADDR_RESOLVED;
192 static void smb_direct_rdma_connect_handler(struct tevent_context *ev,
193 struct tevent_fd *fde,
197 struct tevent_req *req =
198 talloc_get_type_abort(private_data,
200 struct smb_direct_rdma_connect_state *state =
202 struct smb_direct_rdma_connect_state);
203 struct ibv_qp_init_attr init_attr;
204 struct rdma_conn_param conn_param;
209 ret = rdma_get_cm_event(state->t->rdma.cm_channel,
210 &state->t->rdma.cm_event);
212 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
213 __location__, __FUNCTION__, ret, errno));
216 if (state->t->rdma.cm_event->status != 0) {
217 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
218 __location__, __FUNCTION__, ret, errno));
222 if (state->t->rdma.cm_event->event != state->t->rdma.expected_event) {
223 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
224 __location__, __FUNCTION__, ret, errno));
228 switch (state->t->rdma.cm_event->event) {
229 case RDMA_CM_EVENT_ADDR_RESOLVED:
231 ret = rdma_resolve_route(state->t->rdma.cm_id, 5000);
233 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
234 __location__, __FUNCTION__, ret, errno));
237 state->t->rdma.expected_event = RDMA_CM_EVENT_ROUTE_RESOLVED;
239 case RDMA_CM_EVENT_ADDR_ERROR:
241 case RDMA_CM_EVENT_ROUTE_RESOLVED:
244 state->t->ibv.pd = ibv_alloc_pd(state->t->rdma.cm_id->verbs);
245 if (state->t->ibv.pd == NULL) {
249 state->t->ibv.comp_channel = ibv_create_comp_channel(state->t->rdma.cm_id->verbs);
250 if (state->t->ibv.comp_channel == NULL) {
253 ZERO_STRUCT(init_attr);
254 init_attr.cap.max_send_wr = 16;
255 init_attr.cap.max_recv_wr = 2;
256 init_attr.cap.max_recv_sge = 1;
257 init_attr.cap.max_send_sge = 1;
258 init_attr.qp_type = IBV_QPT_RC;
260 state->t->ibv.cq = ibv_create_cq(state->t->rdma.cm_id->verbs,
261 init_attr.cap.max_send_wr * 2,
263 state->t->ibv.comp_channel,
265 if (state->t->ibv.cq == NULL) {
268 init_attr.send_cq = state->t->ibv.cq;
269 init_attr.recv_cq = state->t->ibv.cq;
272 ret = ibv_req_notify_cq(state->t->ibv.cq, 0);
274 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
275 __location__, __FUNCTION__, ret, errno));
281 ret = rdma_create_qp(state->t->rdma.cm_id, state->t->ibv.pd,
284 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
285 __location__, __FUNCTION__, ret, errno));
288 state->t->ibv.qp = state->t->rdma.cm_id->qp;
290 ZERO_STRUCT(conn_param);
291 conn_param.responder_resources = 1;
292 conn_param.initiator_depth = 1;
293 conn_param.retry_count = 10;
296 ret = rdma_connect(state->t->rdma.cm_id, &conn_param);
298 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
299 __location__, __FUNCTION__, ret, errno));
302 state->t->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED;
305 case RDMA_CM_EVENT_ESTABLISHED:
308 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
309 __location__, __FUNCTION__, ret, errno));
311 state->t->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED;
312 TALLOC_FREE(state->t->rdma.fde_channel);
313 tevent_req_done(req);
316 case RDMA_CM_EVENT_ROUTE_ERROR:
317 case RDMA_CM_EVENT_CONNECT_REQUEST:
318 case RDMA_CM_EVENT_CONNECT_RESPONSE:
319 case RDMA_CM_EVENT_CONNECT_ERROR:
320 case RDMA_CM_EVENT_UNREACHABLE:
321 case RDMA_CM_EVENT_REJECTED:
322 case RDMA_CM_EVENT_DISCONNECTED:
323 case RDMA_CM_EVENT_DEVICE_REMOVAL:
324 case RDMA_CM_EVENT_MULTICAST_JOIN:
325 case RDMA_CM_EVENT_MULTICAST_ERROR:
326 case RDMA_CM_EVENT_ADDR_CHANGE:
327 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
332 if (state->t->rdma.cm_event != NULL) {
333 rdma_ack_cm_event(state->t->rdma.cm_event);
334 state->t->rdma.cm_event = NULL;
338 NTSTATUS smb_direct_rdma_connect_recv(struct tevent_req *req)
340 struct smb_direct_rdma_connect_state *state =
342 struct smb_direct_rdma_connect_state);
345 TALLOC_FREE(state->t->rdma.fde_channel);
347 if (tevent_req_is_nterror(req, &status)) {
348 tevent_req_received(req);
352 tevent_req_received(req);
356 struct smb_direct_negotiate_state {
357 struct smb_direct_transport *t;
359 uint8_t buffer[0x14];
362 struct ibv_send_wr wr;
365 uint8_t buffer[0x1C];
368 struct ibv_recv_wr wr;
372 struct tevent_req *smb_direct_negotiate_send(TALLOC_CTX *mem_ctx,
373 struct tevent_context *ev,
374 struct smb_direct_transport *transport);
375 NTSTATUS smb_direct_negotiate_recv(struct tevent_req *req);
377 static void smb_direct_negotiate_rdma_handler(struct tevent_context *ev,
378 struct tevent_fd *fde,
381 static void smb_direct_negotiate_ibv_handler(struct tevent_context *ev,
382 struct tevent_fd *fde,
386 struct tevent_req *smb_direct_negotiate_send(TALLOC_CTX *mem_ctx,
387 struct tevent_context *ev,
388 struct smb_direct_transport *transport)
390 struct tevent_req *req;
391 struct smb_direct_negotiate_state *state;
392 struct ibv_recv_wr *bad_recv_wr = NULL;
393 struct ibv_send_wr *bad_send_wr = NULL;
396 req = tevent_req_create(mem_ctx, &state,
397 struct smb_direct_negotiate_state);
401 state->t = transport;
403 transport->ibv.fde_channel = tevent_add_fd(ev, transport,
404 transport->ibv.comp_channel->fd,
406 smb_direct_negotiate_ibv_handler,
408 if (tevent_req_nomem(transport->ibv.fde_channel, req)) {
409 return tevent_req_post(req, ev);
411 transport->rdma.fde_channel = tevent_add_fd(ev, transport,
412 transport->rdma.cm_channel->fd,
414 smb_direct_negotiate_rdma_handler,
416 if (tevent_req_nomem(transport->rdma.fde_channel, req)) {
417 return tevent_req_post(req, ev);
420 SSVAL(state->req.buffer, 0x00, 0x0100);
421 SSVAL(state->req.buffer, 0x02, 0x0100);
422 SSVAL(state->req.buffer, 0x04, 0x0000);
423 SSVAL(state->req.buffer, 0x06, 0x000A);
424 SIVAL(state->req.buffer, 0x08, 0x00000400);
425 SIVAL(state->req.buffer, 0x0C, 0x00000400);
426 SIVAL(state->req.buffer, 0x10, 0x00020000);
428 state->req.mr = ibv_reg_mr(transport->ibv.pd,
430 sizeof(state->req.buffer),
431 IBV_ACCESS_LOCAL_WRITE);
432 if (tevent_req_nomem(state->req.mr, req)) {
433 return tevent_req_post(req, ev);
436 state->req.sge.addr = (uint64_t) (uintptr_t) state->req.buffer;
437 state->req.sge.length = sizeof(state->req.buffer);;
438 state->req.sge.lkey = state->req.mr->lkey;
439 state->req.wr.opcode = IBV_WR_SEND;
440 state->req.wr.send_flags = IBV_SEND_SIGNALED;
441 state->req.wr.sg_list = &state->req.sge;
442 state->req.wr.num_sge = 1;
444 state->rep.mr = ibv_reg_mr(transport->ibv.pd,
446 sizeof(state->rep.buffer),
448 if (tevent_req_nomem(state->rep.mr, req)) {
449 return tevent_req_post(req, ev);
452 state->rep.sge.addr = (uint64_t) (uintptr_t) state->rep.buffer;
453 state->rep.sge.length = sizeof(state->rep.buffer);;
454 state->rep.sge.lkey = state->rep.mr->lkey;
455 state->rep.wr.sg_list = &state->rep.sge;
456 state->rep.wr.num_sge = 1;
459 ret = ibv_post_recv(transport->ibv.qp, &state->rep.wr, &bad_recv_wr);
461 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
462 __location__, __FUNCTION__, ret, errno));
467 ret = ibv_post_send(transport->ibv.qp, &state->req.wr, &bad_send_wr);
469 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
470 __location__, __FUNCTION__, ret, errno));
474 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
475 __location__, __FUNCTION__, ret, errno));
479 static void smb_direct_negotiate_ibv_handler(struct tevent_context *ev,
480 struct tevent_fd *fde,
484 struct tevent_req *req =
485 talloc_get_type_abort(private_data,
487 struct smb_direct_negotiate_state *state =
489 struct smb_direct_negotiate_state);
490 struct ibv_cq *cq = NULL;
491 void *cq_context = NULL;
493 struct ibv_recv_wr *bad_wr;
498 ret = ibv_get_cq_event(state->t->ibv.comp_channel,
501 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
502 __location__, __FUNCTION__, ret, errno));
505 if (cq != state->t->ibv.cq) {
507 if (cq_context != state->t) {
510 ret = ibv_req_notify_cq(state->t->ibv.cq, 0);
512 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
513 __location__, __FUNCTION__, ret, errno));
516 ret = ibv_poll_cq(state->t->ibv.cq, 1, &wc);
518 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
519 __location__, __FUNCTION__, ret, errno));
524 if (wc.status != IBV_WC_SUCCESS) {
530 case IBV_WC_RDMA_WRITE:
532 case IBV_WC_RDMA_READ:
535 //ret = ibv_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
541 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
542 __location__, __FUNCTION__, ret, errno));
543 ibv_ack_cq_events(state->t->ibv.cq, 1);
546 static void smb_direct_negotiate_rdma_handler(struct tevent_context *ev,
547 struct tevent_fd *fde,
551 struct tevent_req *req =
552 talloc_get_type_abort(private_data,
554 struct smb_direct_negotiate_state *state =
556 struct smb_direct_negotiate_state);
561 ret = rdma_get_cm_event(state->t->rdma.cm_channel,
562 &state->t->rdma.cm_event);
564 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
565 __location__, __FUNCTION__, ret, errno));
568 if (state->t->rdma.cm_event->status != 0) {
569 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
570 __location__, __FUNCTION__, ret, errno));
574 if (state->t->rdma.cm_event->event != state->t->rdma.expected_event) {
575 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
576 __location__, __FUNCTION__, ret, errno));
580 switch (state->t->rdma.cm_event->event) {
581 case RDMA_CM_EVENT_ADDR_RESOLVED:
582 case RDMA_CM_EVENT_ADDR_ERROR:
583 case RDMA_CM_EVENT_ROUTE_RESOLVED:
584 case RDMA_CM_EVENT_ESTABLISHED:
585 case RDMA_CM_EVENT_ROUTE_ERROR:
586 case RDMA_CM_EVENT_CONNECT_REQUEST:
587 case RDMA_CM_EVENT_CONNECT_RESPONSE:
588 case RDMA_CM_EVENT_CONNECT_ERROR:
589 case RDMA_CM_EVENT_UNREACHABLE:
590 case RDMA_CM_EVENT_REJECTED:
591 case RDMA_CM_EVENT_DISCONNECTED:
592 case RDMA_CM_EVENT_DEVICE_REMOVAL:
593 case RDMA_CM_EVENT_MULTICAST_JOIN:
594 case RDMA_CM_EVENT_MULTICAST_ERROR:
595 case RDMA_CM_EVENT_ADDR_CHANGE:
596 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
601 if (state->t->rdma.cm_event != NULL) {
602 rdma_ack_cm_event(state->t->rdma.cm_event);
603 state->t->rdma.cm_event = NULL;
607 NTSTATUS smb_direct_negotiate_recv(struct tevent_req *req)
609 struct smb_direct_negotiate_state *state =
611 struct smb_direct_negotiate_state);
614 TALLOC_FREE(state->t->ibv.fde_channel);
615 TALLOC_FREE(state->t->rdma.fde_channel);
617 if (tevent_req_is_nterror(req, &status)) {
618 tevent_req_received(req);
622 tevent_req_received(req);
625 #endif /* SMB_TRANSPORT_ENABLE_RDMA */