f949659eebcacae30cc8adf18469668b1d4dd38d
[metze/samba/wip.git] / libcli / smb / smb_transport_direct.c
1 /*
2    Unix SMB/CIFS implementation.
3    Infrastructure for SMB-Direct RDMA as transport
4    Copyright (C) Stefan Metzmacher 2012
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "includes.h"
21 #include "system/network.h"
22 #include <tevent.h>
23 #include "../util/tevent_ntstatus.h"
24 #include "../lib/tsocket/tsocket.h"
25
26 #include "lib/util/util_net.h" //TODO
27
28 #ifdef SMB_TRANSPORT_ENABLE_RDMA
29 #include <rdma/rdma_cma_abi.h>
30 #include <rdma/rdma_cma.h>
31 #include <infiniband/verbs.h>
32
33 struct smb_direct_transport {
34         struct {
35                 struct rdma_cm_id *cm_id;
36                 struct rdma_event_channel *cm_channel;
37                 struct tevent_fd *fde_channel;
38                 enum rdma_cm_event_type expected_event;
39                 struct rdma_cm_event *cm_event;
40         } rdma;
41         struct {
42                 struct ibv_pd *pd;
43                 struct ibv_comp_channel *comp_channel;
44                 struct tevent_fd *fde_channel;
45                 struct ibv_cq *cq;
46                 struct ibv_qp *qp;
47         } ibv;
48 };
49
50 static int smb_direct_transport_destructor(struct smb_direct_transport *t);
51
52 struct smb_direct_transport *smb_direct_transport_create(TALLOC_CTX *mem_ctx);
53
54 struct smb_direct_transport *smb_direct_transport_create(TALLOC_CTX *mem_ctx)
55 {
56         struct smb_direct_transport *t;
57         int ret;
58
59         t = talloc_zero(mem_ctx, struct smb_direct_transport);
60         if (t == NULL) {
61                 return NULL;
62         }
63         talloc_set_destructor(t, smb_direct_transport_destructor);
64
65         t->rdma.cm_channel = rdma_create_event_channel();
66         if (t->rdma.cm_channel == NULL) {
67                 talloc_free(t);
68                 return NULL;
69         }
70
71 #if RDMA_USER_CM_MAX_ABI_VERSION >= 2
72         ret = rdma_create_id(t->rdma.cm_channel,
73                              &t->rdma.cm_id,
74                              t, RDMA_PS_TCP);
75 #else
76         ret = rdma_create_id(t->rdma.cm_channel,
77                              &t->rdma.cm_id,
78                              t);
79 #endif
80         if (ret != 0) {
81                 talloc_free(t);
82                 return NULL;
83         }
84
85         return t;
86 }
87
88 static int smb_direct_transport_destructor(struct smb_direct_transport *t)
89 {
90         TALLOC_FREE(t->ibv.fde_channel);
91         TALLOC_FREE(t->rdma.fde_channel);
92
93         if (t->ibv.qp != NULL) {
94                 ibv_destroy_qp(t->ibv.qp);
95                 t->ibv.qp = NULL;
96         }
97
98         if (t->ibv.cq != NULL) {
99                 ibv_destroy_cq(t->ibv.cq);
100                 t->ibv.cq = NULL;
101         }
102
103         if (t->ibv.comp_channel != NULL) {
104                 ibv_destroy_comp_channel(t->ibv.comp_channel);
105                 t->ibv.comp_channel = NULL;
106         }
107
108         if (t->ibv.pd != NULL) {
109                 ibv_dealloc_pd(t->ibv.pd);
110                 t->ibv.pd = NULL;
111         }
112
113         if (t->rdma.cm_event != NULL) {
114                 rdma_ack_cm_event(t->rdma.cm_event);
115                 t->rdma.cm_event = NULL;
116         }
117
118         if (t->rdma.cm_id != NULL) {
119                 rdma_destroy_id(t->rdma.cm_id);
120                 t->rdma.cm_id = NULL;
121         }
122
123         if (t->rdma.cm_channel != NULL) {
124                 rdma_destroy_event_channel(t->rdma.cm_channel);
125                 t->rdma.cm_channel = NULL;
126         }
127
128         return 0;
129 }
130
131 struct smb_direct_rdma_connect_state {
132         struct smb_direct_transport *t;
133 };
134
135 struct tevent_req *smb_direct_rdma_connect_send(TALLOC_CTX *mem_ctx,
136                                         struct tevent_context *ev,
137                                         struct smb_direct_transport *transport,
138                                         const struct sockaddr_storage *addr,
139                                         struct tsocket_address *local_addr,
140                                         struct tsocket_address *remote_addr);
141 NTSTATUS smb_direct_rdma_connect_recv(struct tevent_req *req);
142
143 static void smb_direct_rdma_connect_handler(struct tevent_context *ev,
144                                             struct tevent_fd *fde,
145                                             uint16_t flags,
146                                             void *private_data);
147
148 struct tevent_req *smb_direct_rdma_connect_send(TALLOC_CTX *mem_ctx,
149                                         struct tevent_context *ev,
150                                         struct smb_direct_transport *transport,
151                                         const struct sockaddr_storage *_addr,
152                                         struct tsocket_address *local_addr,
153                                         struct tsocket_address *remote_addr)
154 {
155         struct tevent_req *req;
156         struct smb_direct_rdma_connect_state *state;
157         int ret;
158         struct sockaddr_storage addr = *_addr;
159         struct sockaddr *src_addr = NULL, *dst_addr = &addr;
160
161         set_sockaddr_port(dst_addr, 5445);
162
163         req = tevent_req_create(mem_ctx, &state,
164                                 struct smb_direct_rdma_connect_state);
165         if (req == NULL) {
166                 return NULL;
167         }
168         state->t = transport;
169
170         transport->rdma.fde_channel = tevent_add_fd(ev, transport,
171                                                 transport->rdma.cm_channel->fd,
172                                                 TEVENT_FD_READ,
173                                                 smb_direct_rdma_connect_handler,
174                                                 req);
175         if (tevent_req_nomem(transport->rdma.fde_channel, req)) {
176                 return tevent_req_post(req, ev);
177         }
178
179         errno = 0;
180         ret = rdma_resolve_addr(state->t->rdma.cm_id,
181                                 src_addr, dst_addr,
182                                 5000);
183         if (ret != 0) {
184                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
185                         __location__, __FUNCTION__, ret, errno));
186         }
187         state->t->rdma.expected_event = RDMA_CM_EVENT_ADDR_RESOLVED;
188
189         return req;
190 }
191
192 static void smb_direct_rdma_connect_handler(struct tevent_context *ev,
193                                             struct tevent_fd *fde,
194                                             uint16_t flags,
195                                             void *private_data)
196 {
197         struct tevent_req *req =
198                 talloc_get_type_abort(private_data,
199                 struct tevent_req);
200         struct smb_direct_rdma_connect_state *state =
201                 tevent_req_data(req,
202                 struct smb_direct_rdma_connect_state);
203         struct ibv_qp_init_attr init_attr;
204         struct rdma_conn_param conn_param;
205         int ret;
206
207         errno = 0;
208
209         ret = rdma_get_cm_event(state->t->rdma.cm_channel,
210                                 &state->t->rdma.cm_event);
211         if (ret != 0) {
212                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
213                         __location__, __FUNCTION__, ret, errno));
214         }
215
216         if (state->t->rdma.cm_event->status != 0) {
217                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
218                         __location__, __FUNCTION__, ret, errno));
219
220         }
221
222         if (state->t->rdma.cm_event->event != state->t->rdma.expected_event) {
223                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
224                         __location__, __FUNCTION__, ret, errno));
225
226         }
227
228         switch (state->t->rdma.cm_event->event) {
229         case RDMA_CM_EVENT_ADDR_RESOLVED:
230         errno = 0;
231                 ret = rdma_resolve_route(state->t->rdma.cm_id, 5000);
232                 if (ret != 0) {
233                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
234                         __location__, __FUNCTION__, ret, errno));
235
236                 }
237                 state->t->rdma.expected_event = RDMA_CM_EVENT_ROUTE_RESOLVED;
238                 break;
239         case RDMA_CM_EVENT_ADDR_ERROR:
240                 break;
241         case RDMA_CM_EVENT_ROUTE_RESOLVED:
242         errno = 0;
243         ret = 0;
244                 state->t->ibv.pd = ibv_alloc_pd(state->t->rdma.cm_id->verbs);
245                 if (state->t->ibv.pd == NULL) {
246
247                 }
248
249                 state->t->ibv.comp_channel = ibv_create_comp_channel(state->t->rdma.cm_id->verbs);
250                 if (state->t->ibv.comp_channel == NULL) {
251                 }
252
253                 ZERO_STRUCT(init_attr);
254                 init_attr.cap.max_send_wr = 16;
255                 init_attr.cap.max_recv_wr = 2;
256                 init_attr.cap.max_recv_sge = 1;
257                 init_attr.cap.max_send_sge = 1;
258                 init_attr.qp_type = IBV_QPT_RC;
259
260                 state->t->ibv.cq = ibv_create_cq(state->t->rdma.cm_id->verbs,
261                                                  init_attr.cap.max_send_wr * 2,
262                                                  state->t,
263                                                  state->t->ibv.comp_channel,
264                                                  0);
265                 if (state->t->ibv.cq == NULL) {
266
267                 }
268                 init_attr.send_cq = state->t->ibv.cq;
269                 init_attr.recv_cq = state->t->ibv.cq;
270
271         errno = 0;
272                 ret = ibv_req_notify_cq(state->t->ibv.cq, 0);
273                 if (ret != 0) {
274                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
275                         __location__, __FUNCTION__, ret, errno));
276
277                 }
278
279
280         errno = 0;
281                 ret = rdma_create_qp(state->t->rdma.cm_id, state->t->ibv.pd,
282                                      &init_attr);
283                 if (ret != 0) {
284                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
285                         __location__, __FUNCTION__, ret, errno));
286
287                 }
288                 state->t->ibv.qp = state->t->rdma.cm_id->qp;
289
290                 ZERO_STRUCT(conn_param);
291                 conn_param.responder_resources = 1;
292                 conn_param.initiator_depth = 1;
293                 conn_param.retry_count = 10;
294
295                 errno = 0;
296                 ret = rdma_connect(state->t->rdma.cm_id, &conn_param);
297                 if (ret != 0) {
298                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
299                         __location__, __FUNCTION__, ret, errno));
300
301                 }
302                 state->t->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED;
303                 break;
304
305         case RDMA_CM_EVENT_ESTABLISHED:
306                 errno = 0;
307                 ret = 0;
308                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
309                         __location__, __FUNCTION__, ret, errno));
310
311                 state->t->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED;
312                 TALLOC_FREE(state->t->rdma.fde_channel);
313                 tevent_req_done(req);
314                 break;
315
316         case RDMA_CM_EVENT_ROUTE_ERROR:
317         case RDMA_CM_EVENT_CONNECT_REQUEST:
318         case RDMA_CM_EVENT_CONNECT_RESPONSE:
319         case RDMA_CM_EVENT_CONNECT_ERROR:
320         case RDMA_CM_EVENT_UNREACHABLE:
321         case RDMA_CM_EVENT_REJECTED:
322         case RDMA_CM_EVENT_DISCONNECTED:
323         case RDMA_CM_EVENT_DEVICE_REMOVAL:
324         case RDMA_CM_EVENT_MULTICAST_JOIN:
325         case RDMA_CM_EVENT_MULTICAST_ERROR:
326         case RDMA_CM_EVENT_ADDR_CHANGE:
327         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
328                 //error
329                 break;
330         }
331
332         if (state->t->rdma.cm_event != NULL) {
333                 rdma_ack_cm_event(state->t->rdma.cm_event);
334                 state->t->rdma.cm_event = NULL;
335         }
336 }
337
338 NTSTATUS smb_direct_rdma_connect_recv(struct tevent_req *req)
339 {
340         struct smb_direct_rdma_connect_state *state =
341                 tevent_req_data(req,
342                 struct smb_direct_rdma_connect_state);
343         NTSTATUS status;
344
345         TALLOC_FREE(state->t->rdma.fde_channel);
346
347         if (tevent_req_is_nterror(req, &status)) {
348                 tevent_req_received(req);
349                 return status;
350         }
351
352         tevent_req_received(req);
353         return NT_STATUS_OK;
354 }
355
356 struct smb_direct_negotiate_state {
357         struct smb_direct_transport *t;
358         struct {
359                 uint8_t buffer[0x14];
360                 struct ibv_mr *mr;
361                 struct ibv_sge sge;
362                 struct ibv_send_wr wr;
363         } req;
364         struct {
365                 uint8_t buffer[0x1C];
366                 struct ibv_mr *mr;
367                 struct ibv_sge sge;
368                 struct ibv_recv_wr wr;
369         } rep;
370 };
371
372 struct tevent_req *smb_direct_negotiate_send(TALLOC_CTX *mem_ctx,
373                                         struct tevent_context *ev,
374                                         struct smb_direct_transport *transport);
375 NTSTATUS smb_direct_negotiate_recv(struct tevent_req *req);
376
377 static void smb_direct_negotiate_rdma_handler(struct tevent_context *ev,
378                                               struct tevent_fd *fde,
379                                               uint16_t flags,
380                                               void *private_data);
381 static void smb_direct_negotiate_ibv_handler(struct tevent_context *ev,
382                                              struct tevent_fd *fde,
383                                              uint16_t flags,
384                                              void *private_data);
385
386 struct tevent_req *smb_direct_negotiate_send(TALLOC_CTX *mem_ctx,
387                                         struct tevent_context *ev,
388                                         struct smb_direct_transport *transport)
389 {
390         struct tevent_req *req;
391         struct smb_direct_negotiate_state *state;
392         struct ibv_recv_wr *bad_recv_wr = NULL;
393         struct ibv_send_wr *bad_send_wr = NULL;
394         int ret;
395
396         req = tevent_req_create(mem_ctx, &state,
397                                 struct smb_direct_negotiate_state);
398         if (req == NULL) {
399                 return NULL;
400         }
401         state->t = transport;
402
403         transport->ibv.fde_channel = tevent_add_fd(ev, transport,
404                                                 transport->ibv.comp_channel->fd,
405                                                 TEVENT_FD_READ,
406                                                 smb_direct_negotiate_ibv_handler,
407                                                 req);
408         if (tevent_req_nomem(transport->ibv.fde_channel, req)) {
409                 return tevent_req_post(req, ev);
410         }
411         transport->rdma.fde_channel = tevent_add_fd(ev, transport,
412                                                 transport->rdma.cm_channel->fd,
413                                                 TEVENT_FD_READ,
414                                                 smb_direct_negotiate_rdma_handler,
415                                                 req);
416         if (tevent_req_nomem(transport->rdma.fde_channel, req)) {
417                 return tevent_req_post(req, ev);
418         }
419
420         SSVAL(state->req.buffer, 0x00, 0x0100);
421         SSVAL(state->req.buffer, 0x02, 0x0100);
422         SSVAL(state->req.buffer, 0x04, 0x0000);
423         SSVAL(state->req.buffer, 0x06, 0x000A);
424         SIVAL(state->req.buffer, 0x08, 0x00000400);
425         SIVAL(state->req.buffer, 0x0C, 0x00000400);
426         SIVAL(state->req.buffer, 0x10, 0x00020000);
427
428         state->req.mr = ibv_reg_mr(transport->ibv.pd,
429                                    state->req.buffer,
430                                    sizeof(state->req.buffer),
431                                    IBV_ACCESS_LOCAL_WRITE);
432         if (tevent_req_nomem(state->req.mr, req)) {
433                 return tevent_req_post(req, ev);
434         }
435
436         state->req.sge.addr = (uint64_t) (uintptr_t) state->req.buffer;
437         state->req.sge.length = sizeof(state->req.buffer);;
438         state->req.sge.lkey = state->req.mr->lkey;
439         state->req.wr.opcode = IBV_WR_SEND;
440         state->req.wr.send_flags = IBV_SEND_SIGNALED;
441         state->req.wr.sg_list = &state->req.sge;
442         state->req.wr.num_sge = 1;
443
444         state->rep.mr = ibv_reg_mr(transport->ibv.pd,
445                                    state->rep.buffer,
446                                    sizeof(state->rep.buffer),
447                                    0);
448         if (tevent_req_nomem(state->rep.mr, req)) {
449                 return tevent_req_post(req, ev);
450         }
451
452         state->rep.sge.addr = (uint64_t) (uintptr_t) state->rep.buffer;
453         state->rep.sge.length = sizeof(state->rep.buffer);;
454         state->rep.sge.lkey = state->rep.mr->lkey;
455         state->rep.wr.sg_list = &state->rep.sge;
456         state->rep.wr.num_sge = 1;
457
458         errno = 0;
459         ret = ibv_post_recv(transport->ibv.qp, &state->rep.wr, &bad_recv_wr);
460         if (ret != 0) {
461                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
462                         __location__, __FUNCTION__, ret, errno));
463
464         }
465
466         errno = 0;
467         ret = ibv_post_send(transport->ibv.qp, &state->req.wr, &bad_send_wr);
468         if (ret != 0) {
469                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
470                         __location__, __FUNCTION__, ret, errno));
471
472         }
473
474                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
475                         __location__, __FUNCTION__, ret, errno));
476         return req;
477 }
478
479 static void smb_direct_negotiate_ibv_handler(struct tevent_context *ev,
480                                              struct tevent_fd *fde,
481                                              uint16_t flags,
482                                              void *private_data)
483 {
484         struct tevent_req *req =
485                 talloc_get_type_abort(private_data,
486                 struct tevent_req);
487         struct smb_direct_negotiate_state *state =
488                 tevent_req_data(req,
489                 struct smb_direct_negotiate_state);
490         struct ibv_cq *cq = NULL;
491         void *cq_context = NULL;
492         struct ibv_wc wc;
493         struct ibv_recv_wr *bad_wr;
494         int ret;
495
496         errno = 0;
497
498         ret = ibv_get_cq_event(state->t->ibv.comp_channel,
499                                &cq, &cq_context);
500         if (ret != 0) {
501                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
502                         __location__, __FUNCTION__, ret, errno));
503         }
504
505         if (cq != state->t->ibv.cq) {
506         }
507         if (cq_context != state->t) {
508         }
509
510         ret = ibv_req_notify_cq(state->t->ibv.cq, 0);
511         if (ret != 0) {
512                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
513                         __location__, __FUNCTION__, ret, errno));
514         }
515
516         ret = ibv_poll_cq(state->t->ibv.cq, 1, &wc);
517         if (ret != 1) {
518                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
519                         __location__, __FUNCTION__, ret, errno));
520         }
521
522         ret = 0;
523
524         if (wc.status != IBV_WC_SUCCESS) {
525         }
526
527         switch (wc.opcode) {
528         case IBV_WC_SEND:
529                 break;
530         case IBV_WC_RDMA_WRITE:
531                 break;
532         case IBV_WC_RDMA_READ:
533                 break;
534         case IBV_WC_RECV:
535                 //ret = ibv_post_recv(cb->qp, &cb->rq_wr, &bad_wr);
536                 break;
537         default:
538                 break;
539         }
540
541                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
542                         __location__, __FUNCTION__, ret, errno));
543         ibv_ack_cq_events(state->t->ibv.cq, 1);
544 }
545
546 static void smb_direct_negotiate_rdma_handler(struct tevent_context *ev,
547                                               struct tevent_fd *fde,
548                                               uint16_t flags,
549                                               void *private_data)
550 {
551         struct tevent_req *req =
552                 talloc_get_type_abort(private_data,
553                 struct tevent_req);
554         struct smb_direct_negotiate_state *state =
555                 tevent_req_data(req,
556                 struct smb_direct_negotiate_state);
557         int ret;
558
559         errno = 0;
560
561         ret = rdma_get_cm_event(state->t->rdma.cm_channel,
562                                 &state->t->rdma.cm_event);
563         if (ret != 0) {
564                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
565                         __location__, __FUNCTION__, ret, errno));
566         }
567
568         if (state->t->rdma.cm_event->status != 0) {
569                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
570                         __location__, __FUNCTION__, ret, errno));
571
572         }
573
574         if (state->t->rdma.cm_event->event != state->t->rdma.expected_event) {
575                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
576                         __location__, __FUNCTION__, ret, errno));
577
578         }
579
580         switch (state->t->rdma.cm_event->event) {
581         case RDMA_CM_EVENT_ADDR_RESOLVED:
582         case RDMA_CM_EVENT_ADDR_ERROR:
583         case RDMA_CM_EVENT_ROUTE_RESOLVED:
584         case RDMA_CM_EVENT_ESTABLISHED:
585         case RDMA_CM_EVENT_ROUTE_ERROR:
586         case RDMA_CM_EVENT_CONNECT_REQUEST:
587         case RDMA_CM_EVENT_CONNECT_RESPONSE:
588         case RDMA_CM_EVENT_CONNECT_ERROR:
589         case RDMA_CM_EVENT_UNREACHABLE:
590         case RDMA_CM_EVENT_REJECTED:
591         case RDMA_CM_EVENT_DISCONNECTED:
592         case RDMA_CM_EVENT_DEVICE_REMOVAL:
593         case RDMA_CM_EVENT_MULTICAST_JOIN:
594         case RDMA_CM_EVENT_MULTICAST_ERROR:
595         case RDMA_CM_EVENT_ADDR_CHANGE:
596         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
597                 //error
598                 break;
599         }
600
601         if (state->t->rdma.cm_event != NULL) {
602                 rdma_ack_cm_event(state->t->rdma.cm_event);
603                 state->t->rdma.cm_event = NULL;
604         }
605 }
606
607 NTSTATUS smb_direct_negotiate_recv(struct tevent_req *req)
608 {
609         struct smb_direct_negotiate_state *state =
610                 tevent_req_data(req,
611                 struct smb_direct_negotiate_state);
612         NTSTATUS status;
613
614         TALLOC_FREE(state->t->ibv.fde_channel);
615         TALLOC_FREE(state->t->rdma.fde_channel);
616
617         if (tevent_req_is_nterror(req, &status)) {
618                 tevent_req_received(req);
619                 return status;
620         }
621
622         tevent_req_received(req);
623         return NT_STATUS_OK;
624 }
625 #endif /* SMB_TRANSPORT_ENABLE_RDMA */