ddd650cc816a2c22395ff7ccd47982abc99424fe
[metze/samba/wip.git] / libcli / smb / smb_direct.c
1 /*
2    Unix SMB/CIFS implementation.
3    Infrastructure for SMB-Direct RDMA as transport
4    Copyright (C) Stefan Metzmacher 2012,2016
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "includes.h"
21 #include "system/network.h"
22 #include <tevent.h>
23 #include "lib/util/tevent_ntstatus.h"
24 #include "lib/tsocket/tsocket.h"
25 #include "lib/util/util_net.h"
26 #include "libcli/smb/smb_common.h"
27 #include "libcli/smb/smb_direct.h"
28 #include "lib/util/dlinklist.h"
29 #include "lib/util/iov_buf.h"
30
31 #ifdef SMB_TRANSPORT_ENABLE_RDMA
32 #include <rdma/rdma_cma_abi.h>
33 #include <rdma/rdma_cma.h>
34 #include <infiniband/verbs.h>
35
36 struct smb_direct_io;
37
38 struct smb_direct_connection {
39         struct tevent_context *last_ev;
40         struct {
41                 uint32_t max_send_size;
42                 uint32_t max_receive_size;
43                 uint32_t max_fragmented_size;
44                 uint32_t max_read_write_size;
45                 uint16_t send_credit_target;
46                 uint16_t send_credits;
47                 uint16_t receive_credit_max;
48                 uint16_t receive_credit_target;
49                 uint16_t receive_credits;
50                 uint32_t keep_alive_internal;
51         } state;
52         struct {
53                 int tmp_fd; /* given to the caller end */
54                 int fd;
55                 struct tevent_fd *fde;
56         } sock;
57         struct {
58                 struct rdma_cm_id *cm_id;
59                 struct rdma_event_channel *cm_channel;
60                 struct tevent_fd *fde_channel;
61                 enum rdma_cm_event_type expected_event;
62                 struct rdma_cm_event *cm_event;
63         } rdma;
64         struct {
65                 struct ibv_pd *pd;
66                 struct ibv_comp_channel *comp_channel;
67                 struct tevent_fd *fde_channel;
68                 struct ibv_cq *send_cq;
69                 struct ibv_cq *recv_cq;
70                 struct ibv_qp *qp;
71                 struct ibv_qp_init_attr init_attr;
72         } ibv;
73
74         TALLOC_CTX *io_mem_ctx;
75         struct {
76                 /*
77                  * here we have io coming into
78                  * the rdma layer, which needs to
79                  * be flushed to the socketpair
80                  */
81                 struct smb_direct_io *idle;
82                 struct smb_direct_io *posted;
83                 struct smb_direct_io *ready;
84                 struct smb_direct_io *out;
85                 uint32_t remaining_length;
86         } r2s;
87         struct {
88                 /*
89                  * here we have io coming from the socketpair
90                  * which needs to be flushed into the rdma layer.
91                  */
92                 struct smb_direct_io *idle;
93                 struct smb_direct_io *posted;
94                 struct smb_direct_io *ready;
95                 struct smb_direct_io *in;
96                 uint32_t remaining_length;
97         } s2r;
98 };
99
100 #define SMB_DIRECT_IO_MAX_DATA 8192
101
102 struct smb_direct_io {
103         struct smb_direct_io *prev, *next;
104
105         struct ibv_mr *hdr_mr;
106         struct ibv_mr *data_mr;
107         struct ibv_sge sge[2];
108
109         struct ibv_recv_wr recv_wr;
110         struct ibv_send_wr send_wr;
111
112         struct iovec _iov_array[2];
113         struct iovec *iov;
114         int iov_count;
115
116         uint32_t data_length;
117         uint32_t remaining_length;
118
119         uint8_t nbt_hdr[0x04];
120         uint8_t smbd_hdr[0x18];
121         uint8_t data[SMB_DIRECT_IO_MAX_DATA];
122 };
123
124 static int smb_direct_io_destructor(struct smb_direct_io *io);
125
126 static struct smb_direct_io *smb_direct_io_create(struct smb_direct_connection *c)
127 {
128         struct smb_direct_io *io;
129
130         if (c->io_mem_ctx == NULL) {
131                 return NULL;
132         }
133
134         io = talloc_zero(c->io_mem_ctx, struct smb_direct_io);
135         if (io == NULL) {
136                 return NULL;
137         }
138         talloc_set_destructor(io, smb_direct_io_destructor);
139
140         io->hdr_mr = ibv_reg_mr(c->ibv.pd,
141                                 io->smbd_hdr,
142                                 sizeof(io->smbd_hdr),
143                                 IBV_ACCESS_LOCAL_WRITE);
144         if (io->hdr_mr == NULL) {
145                 TALLOC_FREE(io);
146                 return NULL;
147         }
148
149         io->data_mr = ibv_reg_mr(c->ibv.pd,
150                                  io->data,
151                                  sizeof(io->data),
152                                  IBV_ACCESS_LOCAL_WRITE);
153         if (io->data_mr == NULL) {
154                 TALLOC_FREE(io);
155                 return NULL;
156         }
157
158         io->sge[0].addr = (uint64_t) (uintptr_t) io->smbd_hdr;
159         io->sge[0].length = sizeof(io->smbd_hdr);
160         io->sge[0].lkey = io->hdr_mr->lkey;
161         io->sge[1].addr = (uint64_t) (uintptr_t) io->data;
162         io->sge[1].length = sizeof(io->data);
163         io->sge[1].lkey = io->data_mr->lkey;
164
165         io->send_wr.wr_id = (uint64_t) (uintptr_t) io;
166         io->send_wr.opcode = IBV_WR_SEND;
167         io->send_wr.send_flags = IBV_SEND_SIGNALED;
168         io->send_wr.sg_list = io->sge;
169         io->send_wr.num_sge = ARRAY_SIZE(io->sge);
170
171         io->recv_wr.wr_id = (uint64_t) (uintptr_t) io;
172         io->recv_wr.sg_list = io->sge;
173         io->recv_wr.num_sge = ARRAY_SIZE(io->sge);
174
175         return io;
176 }
177
178 static int smb_direct_io_destructor(struct smb_direct_io *io)
179 {
180         if (io->hdr_mr != NULL) {
181                 ibv_dereg_mr(io->hdr_mr);
182                 io->hdr_mr = NULL;
183         }
184
185         if (io->data_mr != NULL) {
186                 ibv_dereg_mr(io->data_mr);
187                 io->data_mr = NULL;
188         }
189
190         return 0;
191 }
192
193 static int smb_direct_connection_destructor(struct smb_direct_connection *c);
194
195 struct smb_direct_connection *smb_direct_connection_create(TALLOC_CTX *mem_ctx)
196 {
197         struct smb_direct_connection *c;
198         int sfd[2];
199         int ret;
200         uint16_t i;
201
202         c = talloc_zero(mem_ctx, struct smb_direct_connection);
203         if (c == NULL) {
204                 return NULL;
205         }
206         c->sock.fd = -1;
207         c->sock.tmp_fd = -1;
208
209         talloc_set_destructor(c, smb_direct_connection_destructor);
210
211         c->state.max_send_size       = 1364;
212         c->state.max_receive_size    = SMB_DIRECT_IO_MAX_DATA;
213         c->state.max_fragmented_size = 1048576;
214         c->state.max_read_write_size = 0;
215         c->state.receive_credit_max  = 16;
216         c->state.send_credit_target  = 255;
217         c->state.keep_alive_internal = 5;
218
219         ret = socketpair(AF_UNIX, 0, SOCK_STREAM, sfd);
220         if (ret == -1) {
221                 int saved_errno = errno;
222                 TALLOC_FREE(c);
223                 errno = saved_errno;
224                 return NULL;
225         }
226         c->sock.tmp_fd = sfd[0];
227         c->sock.fd = sfd[1];
228
229         smb_set_close_on_exec(c->sock.tmp_fd);
230         smb_set_close_on_exec(c->sock.fd);
231         set_blocking(c->sock.fd, false);
232
233         c->rdma.cm_channel = rdma_create_event_channel();
234         if (c->rdma.cm_channel == NULL) {
235                 TALLOC_FREE(c);
236                 return NULL;
237         }
238         smb_set_close_on_exec(c->rdma.cm_channel->fd);
239         set_blocking(c->rdma.cm_channel->fd, false);
240
241 #if RDMA_USER_CM_MAX_ABI_VERSION >= 2
242         ret = rdma_create_id(c->rdma.cm_channel,
243                              &c->rdma.cm_id,
244                              c, RDMA_PS_TCP);
245 #else
246 #error
247         ret = rdma_create_id(c->rdma.cm_channel,
248                              &c->rdma.cm_id,
249                              c);
250 #endif
251         if (ret != 0) {
252                 TALLOC_FREE(c);
253                 return NULL;
254         }
255
256         c->ibv.pd = ibv_alloc_pd(c->rdma.cm_id->verbs);
257         if (c->ibv.pd == NULL) {
258                 TALLOC_FREE(c);
259                 return NULL;
260         }
261
262         c->ibv.comp_channel = ibv_create_comp_channel(c->rdma.cm_id->verbs);
263         if (c->ibv.comp_channel == NULL) {
264                 TALLOC_FREE(c);
265                 return NULL;
266         }
267         smb_set_close_on_exec(c->ibv.comp_channel->fd);
268         set_blocking(c->ibv.comp_channel->fd, false);
269
270         c->ibv.init_attr.cap.max_send_wr = 2;
271         c->ibv.init_attr.cap.max_recv_wr = 2;
272         c->ibv.init_attr.cap.max_recv_sge = 2;
273         c->ibv.init_attr.cap.max_send_sge = 2;
274         c->ibv.init_attr.qp_type = IBV_QPT_RC;
275         c->ibv.init_attr.sq_sig_all = 1;
276
277         c->ibv.send_cq = ibv_create_cq(c->rdma.cm_id->verbs,
278                                        c->ibv.init_attr.cap.max_send_wr,
279                                        c, c->ibv.comp_channel, 0);
280         if (c->ibv.send_cq == NULL) {
281                 TALLOC_FREE(c);
282                 return NULL;
283         }
284         c->ibv.init_attr.send_cq = c->ibv.send_cq;
285
286         c->ibv.recv_cq = ibv_create_cq(c->rdma.cm_id->verbs,
287                                        c->ibv.init_attr.cap.max_recv_wr,
288                                        c, c->ibv.comp_channel, 0);
289         if (c->ibv.recv_cq == NULL) {
290                 TALLOC_FREE(c);
291                 return NULL;
292         }
293         c->ibv.init_attr.recv_cq = c->ibv.recv_cq;
294
295         ret = ibv_req_notify_cq(c->ibv.send_cq, 0);
296         if (ret != 0) {
297                 TALLOC_FREE(c);
298                 return NULL;
299         }
300
301         ret = ibv_req_notify_cq(c->ibv.recv_cq, 0);
302         if (ret != 0) {
303                 TALLOC_FREE(c);
304                 return NULL;
305         }
306
307         ret = rdma_create_qp(c->rdma.cm_id, c->ibv.pd, &c->ibv.init_attr);
308         if (ret != 0) {
309                 TALLOC_FREE(c);
310                 return NULL;
311         }
312         c->ibv.qp = c->rdma.cm_id->qp;
313
314         c->io_mem_ctx = talloc_named_const(c, 0, "io_mem_ctx");
315         if (c->io_mem_ctx == NULL) {
316                 TALLOC_FREE(c);
317                 return NULL;
318         }
319
320         for (i = 0; i < c->state.receive_credit_max; i++) {
321                 struct smb_direct_io *io;
322
323                 io = smb_direct_io_create(c);
324                 DLIST_ADD_END(c->r2s.idle, io);
325         }
326
327         for (i = 0; i < c->state.send_credit_target; i++) {
328                 struct smb_direct_io *io;
329
330                 io = smb_direct_io_create(c);
331                 DLIST_ADD_END(c->s2r.idle, io);
332         }
333
334         return c;
335 }
336
337 static int smb_direct_connection_destructor(struct smb_direct_connection *c)
338 {
339         TALLOC_FREE(c->sock.fde);
340
341         if (c->sock.fd != -1) {
342                 close(c->sock.fd);
343                 c->sock.fd = -1;
344         }
345
346         if (c->sock.tmp_fd != -1) {
347                 close(c->sock.tmp_fd);
348                 c->sock.tmp_fd = -1;
349         }
350
351         TALLOC_FREE(c->ibv.fde_channel);
352         TALLOC_FREE(c->rdma.fde_channel);
353
354         TALLOC_FREE(c->io_mem_ctx);
355         ZERO_STRUCT(c->r2s);
356         ZERO_STRUCT(c->s2r);
357
358         if (c->rdma.cm_event != NULL) {
359                 rdma_ack_cm_event(c->rdma.cm_event);
360                 c->rdma.cm_event = NULL;
361         }
362
363         if (c->ibv.qp != NULL) {
364                 ibv_destroy_qp(c->ibv.qp);
365                 c->ibv.qp = NULL;
366         }
367
368         if (c->ibv.send_cq != NULL) {
369                 ibv_destroy_cq(c->ibv.send_cq);
370                 c->ibv.send_cq = NULL;
371         }
372
373         if (c->ibv.recv_cq != NULL) {
374                 ibv_destroy_cq(c->ibv.recv_cq);
375                 c->ibv.recv_cq = NULL;
376         }
377
378         if (c->ibv.comp_channel != NULL) {
379                 ibv_destroy_comp_channel(c->ibv.comp_channel);
380                 c->ibv.comp_channel = NULL;
381         }
382
383         if (c->ibv.pd != NULL) {
384                 ibv_dealloc_pd(c->ibv.pd);
385                 c->ibv.pd = NULL;
386         }
387
388         if (c->rdma.cm_id != NULL) {
389                 rdma_destroy_id(c->rdma.cm_id);
390                 c->rdma.cm_id = NULL;
391         }
392
393         if (c->rdma.cm_channel != NULL) {
394                 rdma_destroy_event_channel(c->rdma.cm_channel);
395                 c->rdma.cm_channel = NULL;
396         }
397
398         return 0;
399 }
400
401 static int smb_direct_connection_post_recv(struct smb_direct_connection *c)
402 {
403         struct smb_direct_io *io = NULL;
404         struct ibv_recv_wr *bad_recv_wr = NULL;
405         int ret;
406
407         if (c->r2s.idle == NULL) {
408                 return 0;
409         }
410
411         for (io = c->r2s.idle; io != NULL; io = io->next) {
412                 if (io->next == NULL) {
413                         io->recv_wr.next = NULL;
414                         break;
415                 }
416
417                 io->recv_wr.next = &io->next->recv_wr;
418         }
419
420         errno = 0;
421         ret = ibv_post_recv(c->ibv.qp, &c->r2s.idle->recv_wr, &bad_recv_wr);
422         if (ret != 0) {
423                 NTSTATUS status;
424                 status = map_nt_error_from_unix_common(errno);
425                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
426                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
427                 return ret;
428         }
429
430         DLIST_CONCATENATE(c->r2s.posted, c->r2s.idle);
431         c->r2s.idle = NULL;
432
433         return 0;
434 }
435
436 static int smb_direct_connection_post_send(struct smb_direct_connection *c)
437 {
438         struct smb_direct_io *io = NULL;
439         struct smb_direct_io *next = NULL;
440         struct smb_direct_io *posted = NULL;
441         struct smb_direct_io *last = NULL;
442         struct ibv_send_wr *bad_send_wr = NULL;
443         int ret;
444
445         if (c->s2r.ready == NULL) {
446                 return 0;
447         }
448
449         if (c->state.send_credits == 0) {
450                 return 0;
451         }
452
453         if (1) {
454                 return 0;
455         }
456
457         for (io = c->s2r.ready; io != NULL; io = next) {
458                 uint16_t granted = 0;
459                 uint16_t flags = 0;
460                 uint32_t data_offset = 0;
461
462                 next = io->next;
463
464                 if (c->state.send_credits == 0) {
465                         break;
466                 }
467
468                 c->state.send_credits -= 1;
469
470                 if (c->state.send_credits == 0) {
471                         flags |= 0x0001;
472                 }
473
474                 granted = c->state.receive_credit_max;
475                 granted -= c->state.receive_credits;
476                 granted = MIN(granted, c->state.receive_credit_target);
477                 c->state.receive_credits += granted;
478
479                 if (io->data_length > 0) {
480                         data_offset = 0x18;
481                         io->sge[0].length = data_offset;
482                         io->sge[1].length = io->data_length;
483                         io->send_wr.num_sge = 2;
484                 } else {
485                         io->sge[0].length = 0x14;
486                         io->send_wr.num_sge = 1;
487                 }
488
489                 SSVAL(io->smbd_hdr, 0x00, c->state.send_credit_target);
490                 SSVAL(io->smbd_hdr, 0x02, granted);
491                 SSVAL(io->smbd_hdr, 0x04, flags);
492                 SSVAL(io->smbd_hdr, 0x06, 0x0000);
493                 SIVAL(io->smbd_hdr, 0x08, io->remaining_length);
494                 SIVAL(io->smbd_hdr, 0x0C, data_offset);
495                 SIVAL(io->smbd_hdr, 0x10, io->data_length);
496                 SIVAL(io->smbd_hdr, 0x14, 0x00000000);
497
498                 if (next == NULL) {
499                         io->send_wr.next = &next->send_wr;
500                 } else {
501                         io->send_wr.next = NULL;
502                 }
503                 DLIST_REMOVE(c->s2r.ready, io);
504                 DLIST_ADD_END(posted, io);
505         }
506
507         last = DLIST_TAIL(posted);
508         last->send_wr.next = NULL;
509
510         errno = 0;
511         ret = ibv_post_send(c->ibv.qp, &c->s2r.ready->send_wr, &bad_send_wr);
512         if (ret != 0) {
513                 NTSTATUS status;
514         //      DLIST_CONCATENATE(c->s2r.ready, posted); // TODO: check bad_send_wr
515                 status = map_nt_error_from_unix_common(errno);
516                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
517                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
518                 return ret;
519         }
520
521         DLIST_CONCATENATE(c->s2r.posted, posted);
522
523         return 0;
524 }
525
526 static int smb_direct_connection_setup_readv(struct smb_direct_connection *c)
527 {
528         TEVENT_FD_READABLE(c->sock.fde);
529         // TODO: immediate_event?? may skips a syscall.
530         return 0;
531 }
532
533 static int smb_direct_connection_setup_writev(struct smb_direct_connection *c)
534 {
535         TEVENT_FD_WRITEABLE(c->sock.fde);
536         // TODO: immediate_event?? may skips a syscall.
537         return 0;
538 }
539
540 struct smb_direct_connection_rdma_connect_state {
541         struct smb_direct_connection *c;
542 };
543
544 static int smb_direct_connection_rdma_connect_state_destructor(
545                struct smb_direct_connection_rdma_connect_state *state)
546 {
547         struct smb_direct_connection *c = state->c;
548
549         TALLOC_FREE(c->rdma.fde_channel);
550
551         return 0;
552 }
553
554 static void smb_direct_connection_rdma_connect_handler(struct tevent_context *ev,
555                                             struct tevent_fd *fde,
556                                             uint16_t flags,
557                                             void *private_data);
558
559 static struct tevent_req *smb_direct_connection_rdma_connect_send(TALLOC_CTX *mem_ctx,
560                                         struct tevent_context *ev,
561                                         struct smb_direct_connection *c,
562                                         const struct sockaddr_storage *src,
563                                         const struct sockaddr_storage *dst,
564                                         struct tsocket_address *local_addr,
565                                         struct tsocket_address *remote_addr)
566 {
567         struct tevent_req *req;
568         struct smb_direct_connection_rdma_connect_state *state;
569         int ret;
570         //struct sockaddr *src_addr = (const struct sockaddr *)src;
571         struct sockaddr *src_addr = NULL;
572         struct sockaddr_storage _dst_addr = *dst;
573         struct sockaddr *dst_addr = (struct sockaddr *)&_dst_addr;
574
575         set_sockaddr_port(dst_addr, 5445);
576
577         req = tevent_req_create(mem_ctx, &state,
578                                 struct smb_direct_connection_rdma_connect_state);
579         if (req == NULL) {
580                 return NULL;
581         }
582         state->c = c;
583
584         talloc_set_destructor(state, smb_direct_connection_rdma_connect_state_destructor);
585
586         c->rdma.fde_channel = tevent_add_fd(ev, c,
587                                         c->rdma.cm_channel->fd,
588                                         TEVENT_FD_READ,
589                                         smb_direct_connection_rdma_connect_handler,
590                                         req);
591         if (tevent_req_nomem(c->rdma.fde_channel, req)) {
592                 return tevent_req_post(req, ev);
593         }
594
595         errno = 0;
596         ret = rdma_resolve_addr(c->rdma.cm_id,
597                                 src_addr, dst_addr,
598                                 5000);
599         if (ret != 0) {
600                 NTSTATUS status = map_nt_error_from_unix_common(errno);
601                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
602                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
603                 tevent_req_nterror(req, status);
604                 return tevent_req_post(req, ev);
605         }
606         c->rdma.expected_event = RDMA_CM_EVENT_ADDR_RESOLVED;
607
608         return req;
609 }
610
611 static void smb_direct_connection_rdma_connect_handler(struct tevent_context *ev,
612                                             struct tevent_fd *fde,
613                                             uint16_t flags,
614                                             void *private_data)
615 {
616         struct tevent_req *req =
617                 talloc_get_type_abort(private_data,
618                 struct tevent_req);
619         struct smb_direct_connection_rdma_connect_state *state =
620                 tevent_req_data(req,
621                 struct smb_direct_connection_rdma_connect_state);
622         struct smb_direct_connection *c = state->c;
623         struct rdma_conn_param conn_param;
624         uint8_t ird_ord_hdr[8];
625         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
626         int ret;
627
628         errno = 0;
629
630         ret = rdma_get_cm_event(c->rdma.cm_channel,
631                                 &c->rdma.cm_event);
632         if (ret != 0) {
633                 status = map_nt_error_from_unix_common(errno);
634                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
635                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
636                 tevent_req_nterror(req, status);
637                 return;
638         }
639
640         errno = 0;
641         if (c->rdma.cm_event->status != 0) {
642                 errno = c->rdma.cm_event->status;
643                 status = map_nt_error_from_unix_common(errno);
644                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
645                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
646                 tevent_req_nterror(req, status);
647                 return;
648         }
649
650         if (c->rdma.cm_event->event != c->rdma.expected_event) {
651                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
652                         __location__, __FUNCTION__, ret, errno));
653
654         }
655
656         switch (c->rdma.cm_event->event) {
657         case RDMA_CM_EVENT_ADDR_RESOLVED:
658         errno = 0;
659                 ret = rdma_resolve_route(c->rdma.cm_id, 5000);
660                 if (ret != 0) {
661                         status = map_nt_error_from_unix_common(errno);
662                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
663                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
664                         tevent_req_nterror(req, status);
665                         return;
666                 }
667                 c->rdma.expected_event = RDMA_CM_EVENT_ROUTE_RESOLVED;
668                 break;
669         case RDMA_CM_EVENT_ROUTE_RESOLVED:
670         errno = 0;
671         ret = 0;
672 #if 0
673                 c->ibv.pd = ibv_alloc_pd(c->rdma.cm_id->verbs);
674                 if (c->ibv.pd == NULL) {
675                         status = map_nt_error_from_unix_common(errno);
676                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
677                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
678                         tevent_req_nterror(req, status);
679                         return;
680                 }
681
682                 c->ibv.comp_channel = ibv_create_comp_channel(c->rdma.cm_id->verbs);
683                 if (c->ibv.comp_channel == NULL) {
684                         status = map_nt_error_from_unix_common(errno);
685                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
686                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
687                         tevent_req_nterror(req, status);
688                         return;
689                 }
690
691                 set_blocking(c->ibv.comp_channel->fd, false);
692                 smb_set_close_on_exec(c->ibv.comp_channel->fd);
693
694                 ZERO_STRUCT(init_attr);
695                 init_attr.cap.max_send_wr = 2;
696                 init_attr.cap.max_recv_wr = 2;
697                 init_attr.cap.max_recv_sge = 2;
698                 init_attr.cap.max_send_sge = 2;
699                 init_attr.qp_type = IBV_QPT_RC;
700                 init_attr.sq_sig_all = 1;
701
702                 c->ibv.send_cq = ibv_create_cq(c->rdma.cm_id->verbs,
703                                                       init_attr.cap.max_send_wr,
704                                                       c,
705                                                       c->ibv.comp_channel,
706                                                       0);
707                 if (c->ibv.send_cq == NULL) {
708                         status = map_nt_error_from_unix_common(errno);
709                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
710                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
711                         tevent_req_nterror(req, status);
712                         return;
713                 }
714                 init_attr.send_cq = c->ibv.send_cq;
715                 c->ibv.recv_cq = ibv_create_cq(c->rdma.cm_id->verbs,
716                                                       init_attr.cap.max_recv_wr,
717                                                       c,
718                                                       c->ibv.comp_channel,
719                                                       0);
720                 if (c->ibv.recv_cq == NULL) {
721                         status = map_nt_error_from_unix_common(errno);
722                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
723                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
724                         tevent_req_nterror(req, status);
725                         return;
726                 }
727                 init_attr.recv_cq = c->ibv.recv_cq;
728
729                 errno = 0;
730                 ret = ibv_req_notify_cq(c->ibv.send_cq, 0);
731                 if (ret != 0) {
732                         status = map_nt_error_from_unix_common(errno);
733                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
734                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
735                         tevent_req_nterror(req, status);
736                         return;
737                 }
738
739                 errno = 0;
740                 ret = ibv_req_notify_cq(c->ibv.recv_cq, 0);
741                 if (ret != 0) {
742                         status = map_nt_error_from_unix_common(errno);
743                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
744                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
745                         tevent_req_nterror(req, status);
746                         return;
747                 }
748
749                 errno = 0;
750                 ret = rdma_create_qp(c->rdma.cm_id, c->ibv.pd,
751                                      &init_attr);
752                 if (ret != 0) {
753                         status = map_nt_error_from_unix_common(errno);
754                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
755                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
756                         tevent_req_nterror(req, status);
757                         return;
758                 }
759                 c->ibv.qp = c->rdma.cm_id->qp;
760 #endif
761                 RSIVAL(ird_ord_hdr, 0, 16);
762                 RSIVAL(ird_ord_hdr, 4, 0);
763
764                 ZERO_STRUCT(conn_param);
765                 conn_param.private_data = ird_ord_hdr;
766                 conn_param.private_data_len = sizeof(ird_ord_hdr);
767                 conn_param.responder_resources = 1;
768                 conn_param.initiator_depth = 1;
769                 conn_param.retry_count = 10;
770
771                 errno = 0;
772                 ret = rdma_connect(c->rdma.cm_id, &conn_param);
773                 if (ret != 0) {
774                         status = map_nt_error_from_unix_common(errno);
775                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
776                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
777                         tevent_req_nterror(req, status);
778                         return;
779                 }
780                 c->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED;
781                 break;
782
783         case RDMA_CM_EVENT_ESTABLISHED:
784                 errno = 0;
785                 ret = 0;
786                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
787                         __location__, __FUNCTION__, ret, errno));
788
789                 c->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED;
790                 TALLOC_FREE(c->rdma.fde_channel);
791                 rdma_ack_cm_event(c->rdma.cm_event);
792                 c->rdma.cm_event = NULL;
793                 tevent_req_done(req);
794                 return;
795
796         case RDMA_CM_EVENT_ADDR_ERROR:
797         case RDMA_CM_EVENT_ROUTE_ERROR:
798         case RDMA_CM_EVENT_CONNECT_REQUEST:
799         case RDMA_CM_EVENT_CONNECT_RESPONSE:
800         case RDMA_CM_EVENT_CONNECT_ERROR:
801         case RDMA_CM_EVENT_UNREACHABLE:
802         case RDMA_CM_EVENT_REJECTED:
803         case RDMA_CM_EVENT_DISCONNECTED:
804         case RDMA_CM_EVENT_DEVICE_REMOVAL:
805         case RDMA_CM_EVENT_MULTICAST_JOIN:
806         case RDMA_CM_EVENT_MULTICAST_ERROR:
807         case RDMA_CM_EVENT_ADDR_CHANGE:
808         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
809                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
810                 DEBUG(0,("%s:%s: event[%d] ret[%d] errno[%d] status[%s]\n",
811                         __location__, __FUNCTION__,
812                         c->rdma.cm_event->event, ret, errno, nt_errstr(status)));
813                 tevent_req_nterror(req, status);
814                 return;
815         }
816
817         rdma_ack_cm_event(c->rdma.cm_event);
818         c->rdma.cm_event = NULL;
819 }
820
821 static NTSTATUS smb_direct_connection_rdma_connect_recv(struct tevent_req *req)
822 {
823         return tevent_req_simple_recv_ntstatus(req);
824 }
825
826 struct smb_direct_connection_negotiate_connect_state {
827         struct smb_direct_connection *c;
828 #if 0
829         struct {
830                 struct ibv_sge sge[1];
831                 struct ibv_send_wr wr;
832         } rdma_read;
833         struct {
834                 uint8_t buffer[0x14];
835                 struct ibv_mr *mr;
836                 struct ibv_sge sge[1];
837                 struct ibv_send_wr wr;
838         } req;
839         struct {
840                 uint8_t buffer[512];//0x20];
841                 struct ibv_mr *mr;
842                 struct ibv_sge sge[1];
843                 struct ibv_recv_wr wr;
844         } rep;
845 #endif
846 };
847
848 static int smb_direct_connection_negotiate_connect_destructor(
849                struct smb_direct_connection_negotiate_connect_state *state)
850 {
851         struct smb_direct_connection *c = state->c;
852
853         TALLOC_FREE(c->ibv.fde_channel);
854         TALLOC_FREE(c->rdma.fde_channel);
855
856 //      if (state->req.mr != NULL) {
857 //              ibv_dereg_mr(state->req.mr);
858 //              state->req.mr = NULL;
859 //      }
860
861         return 0;
862 }
863
864 static void smb_direct_connection_negotiate_connect_rdma_handler(struct tevent_context *ev,
865                                               struct tevent_fd *fde,
866                                               uint16_t flags,
867                                               void *private_data);
868 static void smb_direct_connection_negotiate_connect_ibv_handler(struct tevent_context *ev,
869                                              struct tevent_fd *fde,
870                                              uint16_t flags,
871                                              void *private_data);
872
873 static struct tevent_req *smb_direct_connection_negotiate_connect_send(TALLOC_CTX *mem_ctx,
874                                                      struct tevent_context *ev,
875                                                      struct smb_direct_connection *c)
876 {
877         struct tevent_req *req;
878         struct smb_direct_connection_negotiate_connect_state *state;
879         struct smb_direct_io *rdma_read = NULL;
880         struct smb_direct_io *neg_send = NULL;
881         struct smb_direct_io *neg_recv = NULL;
882         struct ibv_recv_wr *bad_recv_wr = NULL;
883         struct ibv_send_wr *bad_send_wr = NULL;
884         NTSTATUS status;
885         int ret;
886
887         req = tevent_req_create(mem_ctx, &state,
888                                 struct smb_direct_connection_negotiate_connect_state);
889         if (req == NULL) {
890                 return NULL;
891         }
892         state->c = c;
893
894         // TODO: cleanup
895         talloc_set_destructor(state, smb_direct_connection_negotiate_connect_destructor);
896
897         c->rdma.fde_channel = tevent_add_fd(ev, c, c->rdma.cm_channel->fd,
898                                             TEVENT_FD_READ,
899                                             smb_direct_connection_negotiate_connect_rdma_handler,
900                                             req);
901         if (tevent_req_nomem(c->rdma.fde_channel, req)) {
902                 return tevent_req_post(req, ev);
903         }
904         c->ibv.fde_channel = tevent_add_fd(ev, c, c->ibv.comp_channel->fd,
905                                            TEVENT_FD_READ,
906                                            smb_direct_connection_negotiate_connect_ibv_handler,
907                                            req);
908         if (tevent_req_nomem(c->ibv.fde_channel, req)) {
909                 return tevent_req_post(req, ev);
910         }
911
912         neg_recv = smb_direct_io_create(c);
913         if (tevent_req_nomem(neg_recv, req)) {
914                 return tevent_req_post(req, ev);
915         }
916         neg_recv->sge[0].addr = (uint64_t) (uintptr_t) neg_recv->data;
917         neg_recv->sge[0].length = sizeof(neg_recv->data);
918         neg_recv->sge[0].lkey = neg_recv->data_mr->lkey;
919         neg_recv->recv_wr.sg_list = neg_recv->sge;
920         neg_recv->recv_wr.num_sge = 1;
921
922         rdma_read = smb_direct_io_create(c);
923         if (tevent_req_nomem(rdma_read, req)) {
924                 return tevent_req_post(req, ev);
925         }
926         rdma_read->sge[0].addr = 1;
927         rdma_read->sge[0].length = 0;
928         rdma_read->sge[0].lkey = 1;
929         rdma_read->send_wr.opcode = IBV_WR_RDMA_READ;
930         rdma_read->send_wr.send_flags = IBV_SEND_SIGNALED;
931         rdma_read->send_wr.sg_list = rdma_read->sge;
932         rdma_read->send_wr.num_sge = 1;
933         rdma_read->send_wr.wr.rdma.rkey = 1;
934         rdma_read->send_wr.wr.rdma.remote_addr = 1;
935
936         neg_send = smb_direct_io_create(c);
937         if (tevent_req_nomem(neg_send, req)) {
938                 return tevent_req_post(req, ev);
939         }
940         SSVAL(neg_send->data, 0x00, 0x0100);
941         SSVAL(neg_send->data, 0x02, 0x0100);
942         SSVAL(neg_send->data, 0x04, 0x0000);
943         SSVAL(neg_send->data, 0x06, c->state.send_credit_target);
944         SIVAL(neg_send->data, 0x08, c->state.max_send_size);
945         SIVAL(neg_send->data, 0x0C, c->state.max_receive_size);
946         SIVAL(neg_send->data, 0x10, c->state.max_fragmented_size);
947         neg_send->sge[0].addr = (uint64_t) (uintptr_t) neg_send->data;
948         neg_send->sge[0].length = 0x14;
949         neg_send->sge[0].lkey = neg_send->data_mr->lkey;
950         neg_send->send_wr.opcode = IBV_WR_SEND;
951         neg_send->send_wr.send_flags = IBV_SEND_SIGNALED;
952         neg_send->send_wr.sg_list = neg_send->sge;
953         neg_send->send_wr.num_sge = 1;
954
955         errno = 0;
956         ret = ibv_post_recv(c->ibv.qp, &neg_recv->recv_wr, &bad_recv_wr);
957         if (ret != 0) {
958                 status = map_nt_error_from_unix_common(errno);
959                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
960                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
961                 tevent_req_nterror(req, status);
962                 return tevent_req_post(req, ev);
963         }
964
965         rdma_read->send_wr.next = &neg_send->send_wr;
966         errno = 0;
967         ret = ibv_post_send(c->ibv.qp, &rdma_read->send_wr, &bad_send_wr);
968         if (ret != 0) {
969                 status = map_nt_error_from_unix_common(errno);
970                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
971                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
972                 tevent_req_nterror(req, status);
973                 return tevent_req_post(req, ev);
974         }
975
976                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
977                         __location__, __FUNCTION__, ret, errno));
978         return req;
979 }
980
981 static void smb_direct_connection_negotiate_connect_rdma_handler(struct tevent_context *ev,
982                                               struct tevent_fd *fde,
983                                               uint16_t flags,
984                                               void *private_data)
985 {
986         struct tevent_req *req =
987                 talloc_get_type_abort(private_data,
988                 struct tevent_req);
989         struct smb_direct_connection_negotiate_connect_state *state =
990                 tevent_req_data(req,
991                 struct smb_direct_connection_negotiate_connect_state);
992         struct smb_direct_connection *c = state->c;
993         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
994         int ret;
995
996         errno = 0;
997
998         ret = rdma_get_cm_event(c->rdma.cm_channel,
999                                 &c->rdma.cm_event);
1000         if (ret != 0) {
1001                 status = map_nt_error_from_unix_common(errno);
1002                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1003                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1004                 tevent_req_nterror(req, status);
1005                 return;
1006         }
1007
1008         if (c->rdma.cm_event->status != 0) {
1009                 errno = c->rdma.cm_event->status;
1010                 status = map_nt_error_from_unix_common(errno);
1011                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1012                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1013                 tevent_req_nterror(req, status);
1014                 return;
1015         }
1016
1017         switch (c->rdma.cm_event->event) {
1018         case RDMA_CM_EVENT_DISCONNECTED:
1019                 status = NT_STATUS_CONNECTION_DISCONNECTED;
1020                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1021                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1022                 tevent_req_nterror(req, status);
1023                 return;
1024         case RDMA_CM_EVENT_ADDR_RESOLVED:
1025         case RDMA_CM_EVENT_ADDR_ERROR:
1026         case RDMA_CM_EVENT_ROUTE_RESOLVED:
1027         case RDMA_CM_EVENT_ESTABLISHED:
1028         case RDMA_CM_EVENT_ROUTE_ERROR:
1029         case RDMA_CM_EVENT_CONNECT_REQUEST:
1030         case RDMA_CM_EVENT_CONNECT_RESPONSE:
1031         case RDMA_CM_EVENT_CONNECT_ERROR:
1032         case RDMA_CM_EVENT_UNREACHABLE:
1033         case RDMA_CM_EVENT_REJECTED:
1034         case RDMA_CM_EVENT_DEVICE_REMOVAL:
1035         case RDMA_CM_EVENT_MULTICAST_JOIN:
1036         case RDMA_CM_EVENT_MULTICAST_ERROR:
1037         case RDMA_CM_EVENT_ADDR_CHANGE:
1038         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1039                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1040                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1041                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1042                 tevent_req_nterror(req, status);
1043                 return;
1044         }
1045
1046         status = NT_STATUS_INTERNAL_ERROR;
1047         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1048                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1049         tevent_req_nterror(req, status);
1050 }
1051
1052 static void smb_direct_connection_negotiate_connect_ibv_handler(struct tevent_context *ev,
1053                                              struct tevent_fd *fde,
1054                                              uint16_t flags,
1055                                              void *private_data)
1056 {
1057         struct tevent_req *req =
1058                 talloc_get_type_abort(private_data,
1059                 struct tevent_req);
1060         struct smb_direct_connection_negotiate_connect_state *state =
1061                 tevent_req_data(req,
1062                 struct smb_direct_connection_negotiate_connect_state);
1063         struct smb_direct_connection *c = state->c;
1064         struct ibv_cq *cq = NULL;
1065         void *cq_context = NULL;
1066         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1067         struct ibv_wc wc;
1068         int ret;
1069         uint16_t credits_requested;
1070         uint16_t credits_granted;
1071         uint32_t max_read_write_size;
1072         uint32_t preferred_send_size;
1073         uint32_t max_receive_size;
1074         uint32_t max_fragmented_size;
1075         uint32_t tmp;
1076         struct smb_direct_io *io = NULL;
1077
1078         errno = 0;
1079         ret = ibv_get_cq_event(c->ibv.comp_channel,
1080                                &cq, &cq_context);
1081         if (ret != 0) {
1082                 status = map_nt_error_from_unix_common(errno);
1083                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1084                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1085                 tevent_req_nterror(req, status);
1086                 return;
1087         }
1088
1089         ibv_ack_cq_events(cq, 1);
1090
1091         if (cq_context != c) {
1092                 status = NT_STATUS_INTERNAL_ERROR;;
1093                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1094                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1095                 tevent_req_nterror(req, status);
1096                 return;
1097         }
1098
1099         errno = 0;
1100         ret = ibv_req_notify_cq(cq, 0);
1101         if (ret != 0) {
1102                 status = map_nt_error_from_unix_common(errno);
1103                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1104                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1105                 tevent_req_nterror(req, status);
1106                 return;
1107         }
1108
1109         errno = 0;
1110         ZERO_STRUCT(wc);
1111         ret = ibv_poll_cq(cq, 1, &wc);
1112         if (ret != 1) {
1113                 status = map_nt_error_from_unix_common(errno);
1114                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1115                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1116                 tevent_req_nterror(req, status);
1117                 return;
1118         }
1119         ret = 0;
1120
1121         if (wc.status == IBV_WC_WR_FLUSH_ERR) {
1122                 //errno = wc.status;
1123                 status = map_nt_error_from_unix_common(wc.status);//errno);
1124                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1125                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1126                 TALLOC_FREE(c->ibv.fde_channel);
1127                 TALLOC_FREE(c->rdma.fde_channel);
1128                 smb_direct_connection_negotiate_connect_rdma_handler(ev, fde, flags, private_data);
1129                 return;
1130         }
1131         if (wc.status != IBV_WC_SUCCESS) {
1132                 errno = wc.status;
1133                 status = map_nt_error_from_unix_common(errno);
1134                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1135                         __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1136                         ibv_wc_status_str(wc.status)));
1137                 tevent_req_nterror(req, status);
1138                 return;
1139         }
1140
1141         io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
1142                                    struct smb_direct_io);
1143
1144         switch (wc.opcode) {
1145         case IBV_WC_SEND:
1146                 DEBUG(0,("%s:%s: GOT SEND[%p] ret[%d] errno[%d]\n",
1147                         __location__, __FUNCTION__, io, ret, errno));
1148                 TALLOC_FREE(io);
1149                 break;
1150         case IBV_WC_RDMA_READ:
1151                 DEBUG(0,("%s:%s: GOT RDMA_READ[%p] ret[%d] errno[%d]\n",
1152                         __location__, __FUNCTION__, io, ret, errno));
1153                 TALLOC_FREE(io);
1154                 break;
1155         case IBV_WC_RECV:
1156                 DEBUG(0,("%s:%s: GOT RECV[%p] ret[%d] errno[%d]\n",
1157                         __location__, __FUNCTION__, io, ret, errno));
1158                 dump_data(0, io->data, wc.byte_len);
1159                 if (wc.byte_len < 0x20) {
1160                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1161                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1162                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1163                         tevent_req_nterror(req, status);
1164                         return;
1165                 }
1166                 if (SVAL(io->data, 0x00) != 0x0100) {
1167                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1168                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1169                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1170                         tevent_req_nterror(req, status);
1171                         return;
1172                 }
1173                 if (SVAL(io->data, 0x02) != 0x0100) {
1174                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1175                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1176                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1177                         tevent_req_nterror(req, status);
1178                         return;
1179                 }
1180                 if (SVAL(io->data, 0x04) != 0x0100) {
1181                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1182                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1183                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1184                         tevent_req_nterror(req, status);
1185                         return;
1186                 }
1187                 credits_requested = SVAL(io->data, 0x08);
1188                 if (credits_requested == 0) {
1189                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1190                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1191                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1192                         tevent_req_nterror(req, status);
1193                         return;
1194                 }
1195                 credits_granted = SVAL(io->data, 0x0A);
1196                 if (credits_granted == 0) {
1197                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1198                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1199                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1200                         tevent_req_nterror(req, status);
1201                         return;
1202                 }
1203                 status = NT_STATUS(IVAL(io->data, 0x0C));
1204                 if (!NT_STATUS_IS_OK(status)) {
1205                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1206                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1207                         tevent_req_nterror(req, status);
1208                         return;
1209                 }
1210                 max_read_write_size = IVAL(io->data, 0x10);
1211                 preferred_send_size = IVAL(io->data, 0x14);
1212                 if (preferred_send_size > c->state.max_receive_size) {
1213                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1214                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1215                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1216                         tevent_req_nterror(req, status);
1217                         return;
1218                 }
1219                 max_receive_size = IVAL(io->data, 0x18);
1220                 if (max_receive_size < 0x80) {
1221                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1222                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1223                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1224                         tevent_req_nterror(req, status);
1225                         return;
1226                 }
1227                 max_fragmented_size = IVAL(io->data, 0x1C);
1228                 if (max_fragmented_size < 0x20000) {
1229                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1230                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1231                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1232                         tevent_req_nterror(req, status);
1233                         return;
1234                 }
1235
1236                 c->state.receive_credit_target = credits_requested;
1237
1238                 tmp = c->state.max_receive_size;
1239                 tmp = MIN(tmp, preferred_send_size);
1240                 tmp = MAX(tmp, 128);
1241                 c->state.max_receive_size = tmp;
1242
1243                 tmp = c->state.max_send_size;
1244                 tmp = MIN(tmp, max_receive_size);
1245                 c->state.max_send_size = tmp;
1246
1247                 tmp = MIN(1048576, max_read_write_size);
1248                 c->state.max_read_write_size = tmp;
1249
1250                 tmp = c->state.max_fragmented_size;
1251                 tmp = MIN(tmp, max_fragmented_size);
1252                 c->state.max_fragmented_size = tmp;
1253
1254                 c->state.send_credits = credits_granted;
1255
1256                 TALLOC_FREE(c->ibv.fde_channel);
1257                 TALLOC_FREE(c->rdma.fde_channel);
1258
1259                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1260                         __location__, __FUNCTION__, ret, errno));
1261
1262                 TALLOC_FREE(io);
1263
1264                 errno = 0;
1265                 ret = smb_direct_connection_post_recv(c);
1266                 if (ret != 0) {
1267                         status = map_nt_error_from_unix_common(errno);
1268                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1269                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1270                         tevent_req_nterror(req, status);
1271                         return;
1272                 }
1273
1274                 tevent_req_done(req);
1275                 return;
1276
1277         case IBV_WC_RDMA_WRITE:
1278         default:
1279                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1280                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1281                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1282                 tevent_req_nterror(req, status);
1283                 return;
1284         }
1285 }
1286
1287 static NTSTATUS smb_direct_connection_negotiate_connect_recv(struct tevent_req *req)
1288 {
1289         return tevent_req_simple_recv_ntstatus(req);
1290 }
1291
1292 struct smb_direct_connection_connect_state {
1293         struct tevent_context *ev;
1294         struct smb_direct_connection *c;
1295 };
1296
1297 static void smb_direct_connection_connect_done_rdma(struct tevent_req *subreq);
1298 static void smb_direct_connection_connect_done_negotiate(struct tevent_req *subreq);
1299
1300 struct tevent_req *smb_direct_connection_connect_send(TALLOC_CTX *mem_ctx,
1301                                                       struct tevent_context *ev,
1302                                                       struct smb_direct_connection *c,
1303                                                       const struct sockaddr_storage *src,
1304                                                       const struct sockaddr_storage *dst)
1305 {
1306         struct tevent_req *req = NULL;
1307         struct smb_direct_connection_connect_state *state = NULL;
1308         struct tevent_req *subreq = NULL;
1309
1310         req = tevent_req_create(mem_ctx, &state,
1311                                 struct smb_direct_connection_connect_state);
1312         if (req == NULL) {
1313                 return NULL;
1314         }
1315         state->ev = ev;
1316         state->c = c;
1317
1318         subreq = smb_direct_connection_rdma_connect_send(state, ev, c, src, dst, NULL, NULL);
1319         if (tevent_req_nomem(subreq, req)) {
1320                 return tevent_req_post(req, ev);
1321         }
1322         tevent_req_set_callback(subreq,
1323                                 smb_direct_connection_connect_done_rdma,
1324                                 req);
1325
1326         return req;
1327 }
1328
1329 static void smb_direct_connection_connect_done_rdma(struct tevent_req *subreq)
1330 {
1331         struct tevent_req *req =
1332                 tevent_req_callback_data(subreq,
1333                 struct tevent_req);
1334         struct smb_direct_connection_connect_state *state =
1335                 tevent_req_data(req,
1336                 struct smb_direct_connection_connect_state);
1337         NTSTATUS status;
1338
1339         status = smb_direct_connection_rdma_connect_recv(subreq);
1340         TALLOC_FREE(subreq);
1341         if (tevent_req_nterror(req, status)) {
1342                 return;
1343         }
1344
1345         subreq = smb_direct_connection_negotiate_connect_send(state, state->ev, state->c);
1346         if (tevent_req_nomem(subreq, req)) {
1347                 return;
1348         }
1349         tevent_req_set_callback(subreq,
1350                                 smb_direct_connection_connect_done_negotiate,
1351                                 req);
1352 }
1353
1354 static void smb_direct_connection_connect_done_negotiate(struct tevent_req *subreq)
1355 {
1356         struct tevent_req *req =
1357                 tevent_req_callback_data(subreq,
1358                 struct tevent_req);
1359         NTSTATUS status;
1360
1361         status = smb_direct_connection_negotiate_connect_recv(subreq);
1362         TALLOC_FREE(subreq);
1363         if (tevent_req_nterror(req, status)) {
1364                 return;
1365         }
1366
1367         tevent_req_done(req);
1368 }
1369
1370 NTSTATUS smb_direct_connection_connect_recv(struct tevent_req *req, int *fd)
1371 {
1372         struct smb_direct_connection_connect_state *state =
1373                 tevent_req_data(req,
1374                 struct smb_direct_connection_connect_state);
1375         struct smb_direct_connection *c = state->c;
1376         NTSTATUS status;
1377
1378         *fd = -1;
1379
1380         if (tevent_req_is_nterror(req, &status)) {
1381                 tevent_req_received(req);
1382                 return status;
1383         }
1384
1385         *fd = c->sock.tmp_fd;
1386         c->sock.tmp_fd = -1;
1387         tevent_req_received(req);
1388         return NT_STATUS_OK;
1389 }
1390
1391 static void smb_direct_connection_disconnect(struct smb_direct_connection *c,
1392                                                  NTSTATUS status)
1393 {
1394         if (NT_STATUS_IS_OK(status)) {
1395                 status = NT_STATUS_UNEXPECTED_NETWORK_ERROR;
1396         }
1397
1398         smb_direct_connection_destructor(c);
1399 }
1400
1401 static void smb_direct_connection_rdma_handler(struct tevent_context *ev,
1402                                                struct tevent_fd *fde,
1403                                                uint16_t flags,
1404                                                void *private_data)
1405 {
1406         struct smb_direct_connection *c =
1407                 talloc_get_type_abort(private_data,
1408                 struct smb_direct_connection);
1409         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1410         int ret;
1411
1412         errno = 0;
1413
1414         ret = rdma_get_cm_event(c->rdma.cm_channel,
1415                                 &c->rdma.cm_event);
1416         if (ret != 0) {
1417                 status = map_nt_error_from_unix_common(errno);
1418                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1419                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1420                 smb_direct_connection_disconnect(c, status);
1421                 return;
1422         }
1423
1424         if (c->rdma.cm_event->status != 0) {
1425                 errno = c->rdma.cm_event->status;
1426                 status = map_nt_error_from_unix_common(errno);
1427                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1428                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1429                 smb_direct_connection_disconnect(c, status);
1430                 return;
1431         }
1432
1433         switch (c->rdma.cm_event->event) {
1434         case RDMA_CM_EVENT_DISCONNECTED:
1435                 status = NT_STATUS_CONNECTION_DISCONNECTED;
1436                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1437                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1438                 smb_direct_connection_disconnect(c, status);
1439                 return;
1440         case RDMA_CM_EVENT_ADDR_RESOLVED:
1441         case RDMA_CM_EVENT_ADDR_ERROR:
1442         case RDMA_CM_EVENT_ROUTE_RESOLVED:
1443         case RDMA_CM_EVENT_ESTABLISHED:
1444         case RDMA_CM_EVENT_ROUTE_ERROR:
1445         case RDMA_CM_EVENT_CONNECT_REQUEST:
1446         case RDMA_CM_EVENT_CONNECT_RESPONSE:
1447         case RDMA_CM_EVENT_CONNECT_ERROR:
1448         case RDMA_CM_EVENT_UNREACHABLE:
1449         case RDMA_CM_EVENT_REJECTED:
1450         case RDMA_CM_EVENT_DEVICE_REMOVAL:
1451         case RDMA_CM_EVENT_MULTICAST_JOIN:
1452         case RDMA_CM_EVENT_MULTICAST_ERROR:
1453         case RDMA_CM_EVENT_ADDR_CHANGE:
1454         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1455                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1456                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1457                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1458                 smb_direct_connection_disconnect(c, status);
1459                 return;
1460         }
1461
1462         status = NT_STATUS_INTERNAL_ERROR;
1463         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1464                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1465         smb_direct_connection_disconnect(c, status);
1466 }
1467
1468 static void smb_direct_connection_ibv_handler(struct tevent_context *ev,
1469                                               struct tevent_fd *fde,
1470                                               uint16_t fde_flags,
1471                                               void *private_data)
1472 {
1473         struct smb_direct_connection *c =
1474                 talloc_get_type_abort(private_data,
1475                 struct smb_direct_connection);
1476         struct ibv_cq *cq = NULL;
1477         void *cq_context = NULL;
1478         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1479         struct ibv_wc wc;
1480         int ret;
1481         uint16_t credits_requested;
1482         uint16_t credits_granted;
1483         uint16_t flags;
1484         uint32_t data_offset;
1485         struct smb_direct_io *io = NULL;
1486
1487         errno = 0;
1488         ret = ibv_get_cq_event(c->ibv.comp_channel,
1489                                &cq, &cq_context);
1490         if (ret != 0) {
1491                 status = map_nt_error_from_unix_common(errno);
1492                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1493                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1494                 smb_direct_connection_disconnect(c, status);
1495                 return;
1496         }
1497
1498         ibv_ack_cq_events(cq, 1);
1499
1500         if (cq_context != c) {
1501                 status = NT_STATUS_INTERNAL_ERROR;
1502                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1503                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1504                 smb_direct_connection_disconnect(c, status);
1505                 return;
1506         }
1507
1508         errno = 0;
1509         ret = ibv_req_notify_cq(cq, 0);
1510         if (ret != 0) {
1511                 status = map_nt_error_from_unix_common(errno);
1512                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1513                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1514                 smb_direct_connection_disconnect(c, status);
1515                 return;
1516         }
1517
1518         errno = 0;
1519         ZERO_STRUCT(wc);
1520         ret = ibv_poll_cq(cq, 1, &wc);
1521         if (ret != 1) {
1522                 status = map_nt_error_from_unix_common(errno);
1523                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1524                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1525                 smb_direct_connection_disconnect(c, status);
1526                 return;
1527         }
1528         ret = 0;
1529
1530         if (wc.status == IBV_WC_WR_FLUSH_ERR) {
1531                 errno = wc.status;
1532                 status = map_nt_error_from_unix_common(errno);
1533                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1534                         __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1535                         ibv_wc_status_str(wc.status)));
1536                 TALLOC_FREE(c->ibv.fde_channel);
1537                 TALLOC_FREE(c->rdma.fde_channel);
1538                 smb_direct_connection_rdma_handler(ev, fde, 0 /* flags */, private_data);
1539                 return;
1540         }
1541         if (wc.status != IBV_WC_SUCCESS) {
1542                 errno = wc.status;
1543                 status = map_nt_error_from_unix_common(errno);
1544                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1545                         __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1546                         ibv_wc_status_str(wc.status)));
1547                 smb_direct_connection_disconnect(c, status);
1548                 return;
1549         }
1550
1551         io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
1552                                    struct smb_direct_io);
1553
1554         switch (wc.opcode) {
1555         case IBV_WC_SEND:
1556                 DEBUG(0,("%s:%s: GOT SEND[%p] ret[%d] errno[%d]\n",
1557                         __location__, __FUNCTION__, io, ret, errno));
1558                 DLIST_REMOVE(c->s2r.posted, io);
1559                 DLIST_ADD_END(c->s2r.idle, io);
1560
1561                 errno = 0;
1562                 ret = smb_direct_connection_setup_readv(c);
1563                 if (ret != 0) {
1564                         status = map_nt_error_from_unix_common(errno);
1565                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1566                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1567                         smb_direct_connection_disconnect(c, status);
1568                         return;
1569                 }
1570                 return;
1571
1572         case IBV_WC_RECV:
1573                 DEBUG(0,("%s:%s: GOT RECV[%p] ret[%d] errno[%d]\n",
1574                         __location__, __FUNCTION__, io, ret, errno));
1575                 if (wc.byte_len >= c->state.max_receive_size) {
1576                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1577                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1578                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1579                         smb_direct_connection_disconnect(c, status);
1580                         return;
1581                 }
1582                 if (wc.byte_len < 0x14) {
1583                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1584                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1585                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1586                         smb_direct_connection_disconnect(c, status);
1587                         return;
1588                 }
1589                 DLIST_REMOVE(c->r2s.posted, io);
1590                 dump_data(0, io->smbd_hdr, MIN(wc.byte_len, sizeof(io->smbd_hdr)));
1591                 credits_requested = SVAL(io->smbd_hdr, 0x00);
1592                 if (credits_requested == 0) {
1593                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1594                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1595                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1596                         smb_direct_connection_disconnect(c, status);
1597                         return;
1598                 }
1599                 credits_granted = SVAL(io->smbd_hdr, 0x02);
1600                 if (credits_granted == 0) {
1601                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1602                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1603                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1604                         smb_direct_connection_disconnect(c, status);
1605                         return;
1606                 }
1607                 flags = SVAL(io->smbd_hdr, 0x04);
1608                 io->remaining_length = IVAL(io->smbd_hdr, 0x08);
1609                 data_offset = IVAL(io->smbd_hdr, 0x0C);
1610                 io->data_length = IVAL(io->smbd_hdr, 0x10);
1611
1612                 c->state.receive_credits -= 1;
1613                 c->state.receive_credit_target = credits_requested;
1614                 c->state.send_credits += credits_granted;
1615
1616                 if (data_offset == 0) {
1617                         if (wc.byte_len != 0x14) {
1618                                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1619                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1620                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1621                                 smb_direct_connection_disconnect(c, status);
1622                                 return;
1623                         }
1624                         DLIST_ADD_END(c->r2s.idle, io);
1625                         errno = 0;
1626                         ret = smb_direct_connection_post_recv(c);
1627                         if (ret != 0) {
1628                                 status = map_nt_error_from_unix_common(errno);
1629                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1630                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1631                                 smb_direct_connection_disconnect(c, status);
1632                                 return;
1633                         }
1634                         return;
1635                 } else if (data_offset == 0x18) {
1636                         if (io->data_length >= (c->state.max_receive_size - data_offset)) {
1637                                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1638                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1639                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1640                                 smb_direct_connection_disconnect(c, status);
1641                                 return;
1642                         }
1643                 } else {
1644                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1645                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1646                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1647                         smb_direct_connection_disconnect(c, status);
1648                         return;
1649                 }
1650
1651                 if (c->r2s.remaining_length > 0) {
1652                         if (io->data_length > c->r2s.remaining_length) {
1653                                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1654                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1655                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1656                                 smb_direct_connection_disconnect(c, status);
1657                                 return;
1658                         }
1659
1660                         c->r2s.remaining_length -= io->data_length;
1661
1662                         if (io->remaining_length != c->r2s.remaining_length) {
1663                                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1664                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1665                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1666                                 smb_direct_connection_disconnect(c, status);
1667                                 return;
1668                         }
1669
1670                         io->iov = io->_iov_array;
1671                         io->iov[0].iov_base = io->data;
1672                         io->iov[0].iov_len = io->data_length;
1673                         io->iov_count = 1;
1674                 } else {
1675                         uint64_t total_length = io->data_length + io->remaining_length;
1676
1677                         if (total_length >= c->state.max_fragmented_size) { //correct direction
1678                                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1679                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1680                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1681                                 smb_direct_connection_disconnect(c, status);
1682                                 return;
1683                         }
1684
1685                         _smb_setlen_tcp(io->nbt_hdr, total_length);
1686                         io->iov = io->_iov_array;
1687                         io->iov[0].iov_base = io->nbt_hdr;
1688                         io->iov[0].iov_len = sizeof(io->nbt_hdr);
1689                         io->iov[1].iov_base = io->data;
1690                         io->iov[1].iov_len = io->data_length;
1691                         io->iov_count = 2;
1692                 }
1693
1694                 if (c->state.receive_credits == 0) {
1695                         // TODO: send more credits
1696                         status = map_nt_error_from_unix_common(errno);
1697                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1698                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1699                         smb_direct_connection_disconnect(c, status);
1700                         return;
1701                 }
1702
1703                 if (flags & ~0x0001) {
1704                         status = map_nt_error_from_unix_common(errno);
1705                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1706                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1707                         smb_direct_connection_disconnect(c, status);
1708                         return;
1709                 }
1710
1711                 if (flags & 0x0001) {
1712                         // TODO: send more credits
1713                         status = map_nt_error_from_unix_common(errno);
1714                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1715                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1716                         smb_direct_connection_disconnect(c, status);
1717                         return;
1718                 }
1719
1720                 DLIST_ADD_END(c->r2s.ready, io);
1721                 errno = 0;
1722                 ret = smb_direct_connection_setup_writev(c);
1723                 if (ret != 0) {
1724                         status = map_nt_error_from_unix_common(errno);
1725                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1726                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1727                         smb_direct_connection_disconnect(c, status);
1728                         return;
1729                 }
1730                 return;
1731
1732         case IBV_WC_RDMA_READ:
1733         case IBV_WC_RDMA_WRITE:
1734         default:
1735                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1736                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1737                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1738                 smb_direct_connection_disconnect(c, status);
1739                 return;
1740         }
1741 }
1742
1743 static void smb_direct_connection_sock_handler(struct tevent_context *ev,
1744                                                struct tevent_fd *fde,
1745                                                uint16_t fde_flags,
1746                                                void *private_data)
1747 {
1748         struct smb_direct_connection *c =
1749                 talloc_get_type_abort(private_data,
1750                 struct smb_direct_connection);
1751         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1752         struct smb_direct_io *io = NULL;
1753         int ret;
1754         bool ok;
1755
1756  do_write:
1757         if (fde_flags & TEVENT_FD_WRITE) {
1758                 if (c->r2s.out != NULL) {
1759                         io = c->r2s.out;
1760                 } else {
1761                         io = c->r2s.ready;
1762                         DLIST_REMOVE(c->r2s.ready, io);
1763                         c->r2s.out = io;
1764                 }
1765
1766                 if (io != NULL) {
1767                         ssize_t sret;
1768
1769                         sret = writev(c->sock.fd, io->iov, io->iov_count);
1770                         if (sret == -1) {
1771                                 if (errno == EAGAIN) { // and more...
1772                                         TEVENT_FD_WRITEABLE(c->sock.fde);
1773                                         goto done_write;
1774                                 }
1775
1776                                 status = map_nt_error_from_unix_common(errno);
1777                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1778                                         __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1779                                 smb_direct_connection_disconnect(c, status);
1780                                 return;
1781                         }
1782
1783                         ok = iov_advance(&io->iov, &io->iov_count, sret);
1784                         if (!ok) {
1785                                 status = map_nt_error_from_unix_common(errno);
1786                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1787                                         __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1788                                 smb_direct_connection_disconnect(c, status);
1789                                 return;
1790                         }
1791
1792                         if (io->iov_count == 0) {
1793                                 c->r2s.out = NULL;
1794                                 DLIST_ADD_END(c->r2s.idle, io);
1795                                 goto do_write;
1796                         }
1797                 } else {
1798                         TEVENT_FD_NOT_WRITEABLE(c->sock.fde);
1799                 }
1800         }
1801
1802  done_write:
1803         ret = smb_direct_connection_post_recv(c);
1804         if (ret != 0) {
1805                 status = map_nt_error_from_unix_common(errno);
1806                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1807                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1808                 smb_direct_connection_disconnect(c, status);
1809                 return;
1810         }
1811
1812  do_read:
1813         if (fde_flags & TEVENT_FD_READ) {
1814                 if (c->s2r.in != NULL) {
1815                         io = c->s2r.in;
1816                 } else {
1817                         io = c->s2r.idle;
1818                         DLIST_REMOVE(c->s2r.idle, io);
1819                         c->s2r.in = io;
1820
1821                         if (c->s2r.remaining_length > 0) {
1822                                 /*
1823                                  * We need to continue to get
1824                                  * the incomplete packet.
1825                                  */
1826                                 io->data_length = MIN(sizeof(io->data),
1827                                                       c->s2r.remaining_length);
1828                                 io->remaining_length = c->s2r.remaining_length;
1829                                 io->remaining_length -= io->data_length;
1830                                 c->s2r.remaining_length = io->remaining_length;
1831
1832                                 io->iov = io->_iov_array;
1833                                 io->iov[0].iov_base = io->data;
1834                                 io->iov[0].iov_len = io->data_length;
1835                                 io->iov_count = 1;
1836                         } else {
1837                                 /*
1838                                  * For a new packet we need to get the length
1839                                  * first.
1840                                  */
1841                                 io->data_length = 0;
1842                                 io->remaining_length = 0;
1843
1844                                 io->iov = io->_iov_array;
1845                                 io->iov[0].iov_base = io->nbt_hdr;
1846                                 io->iov[0].iov_len = sizeof(io->nbt_hdr);
1847                                 io->iov_count = 1;
1848                         }
1849                 }
1850                 if (io != NULL) {
1851                         ssize_t sret;
1852
1853                         sret = readv(c->sock.fd, io->iov, io->iov_count);
1854                         if (sret == -1) {
1855                                 if (errno == EAGAIN) { // and more...
1856                                         TEVENT_FD_READABLE(c->sock.fde);
1857                                         goto do_read;
1858                                 }
1859
1860                                 status = map_nt_error_from_unix_common(errno);
1861                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1862                                         __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1863                                 smb_direct_connection_disconnect(c, status);
1864                                 return;
1865                         }
1866                         if (sret == 0) {
1867                                 status = NT_STATUS_CONNECTION_DISCONNECTED;
1868                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1869                                         __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1870                                 smb_direct_connection_disconnect(c, status);
1871                                 return;
1872                         }
1873
1874                         ok = iov_advance(&io->iov, &io->iov_count, sret);
1875                         if (!ok) {
1876                                 status = map_nt_error_from_unix_common(errno);
1877                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1878                                         __location__, __FUNCTION__, (int)sret, errno, nt_errstr(status)));
1879                                 smb_direct_connection_disconnect(c, status);
1880                                 return;
1881                         }
1882
1883                         if (io->iov_count == 0) {
1884                                 if (io->data_length != 0) {
1885                                         /*
1886                                          * We managed to read the whole fragment
1887                                          * which is ready to be posted into the
1888                                          * send queue.
1889                                          */
1890                                         c->s2r.in = NULL;
1891                                         DLIST_ADD_END(c->r2s.ready, io);
1892                                         goto do_read;
1893                                 }
1894
1895                                 c->s2r.remaining_length = smb_len_tcp(io->nbt_hdr);
1896                                 if (c->s2r.remaining_length > c->state.max_fragmented_size) { //correct direction
1897                                         status = NT_STATUS_INVALID_BUFFER_SIZE;
1898                                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1899                                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1900                                         smb_direct_connection_disconnect(c, status);
1901                                         return;
1902                                 }
1903
1904                                 io->data_length = MIN(sizeof(io->data),
1905                                                       c->s2r.remaining_length);
1906                                 io->remaining_length = c->s2r.remaining_length;
1907                                 io->remaining_length -= io->data_length;
1908                                 c->s2r.remaining_length = io->remaining_length;
1909
1910                                 io->iov = io->_iov_array;
1911                                 io->iov[0].iov_base = io->data;
1912                                 io->iov[0].iov_len = io->data_length;
1913                                 io->iov_count = 1;
1914
1915                                 /*
1916                                  * try to read the reset immediately.
1917                                  */
1918                                 goto do_read;
1919                         }
1920                 } else {
1921                         TEVENT_FD_NOT_READABLE(c->sock.fde);
1922                 }
1923         }
1924
1925         ret = smb_direct_connection_post_send(c);
1926         if (ret != 0) {
1927                 status = map_nt_error_from_unix_common(errno);
1928                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1929                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1930                 smb_direct_connection_disconnect(c, status);
1931                 return;
1932         }
1933 }
1934
1935 NTSTATUS smb_direct_connection_setup_events(struct smb_direct_connection *c,
1936                                             struct tevent_context *ev)
1937 {
1938         uint16_t sock_fde_flags = TEVENT_FD_READ;
1939
1940         if (c->r2s.out != NULL) {
1941                 sock_fde_flags |= TEVENT_FD_WRITE;
1942         }
1943
1944         if (tevent_fd_get_flags(c->ibv.fde_channel) == 0) {
1945                 c->last_ev = NULL;
1946                 TALLOC_FREE(c->sock.fde);
1947                 TALLOC_FREE(c->ibv.fde_channel);
1948                 TALLOC_FREE(c->rdma.fde_channel);
1949         }
1950
1951         if (tevent_fd_get_flags(c->rdma.fde_channel) == 0) {
1952                 c->last_ev = NULL;
1953                 TALLOC_FREE(c->sock.fde);
1954                 TALLOC_FREE(c->ibv.fde_channel);
1955                 TALLOC_FREE(c->rdma.fde_channel);
1956         }
1957
1958         if (ev == NULL) {
1959                 c->last_ev = NULL;
1960                 TALLOC_FREE(c->sock.fde);
1961                 TALLOC_FREE(c->ibv.fde_channel);
1962                 TALLOC_FREE(c->rdma.fde_channel);
1963         } else if (ev == c->last_ev) {
1964                 return NT_STATUS_OK;
1965         } else if (c->last_ev == NULL) {
1966                 /* fallthrough */
1967         } else {
1968                 return NT_STATUS_INVALID_PARAMETER_MIX;
1969         }
1970
1971         c->rdma.fde_channel = tevent_add_fd(ev, c,
1972                                             c->rdma.cm_channel->fd,
1973                                             TEVENT_FD_READ,
1974                                             smb_direct_connection_rdma_handler,
1975                                             c);
1976         if (c->rdma.fde_channel == NULL) {
1977                 return NT_STATUS_NO_MEMORY;
1978         }
1979         c->ibv.fde_channel = tevent_add_fd(ev, c,
1980                                            c->ibv.comp_channel->fd,
1981                                            TEVENT_FD_READ,
1982                                            smb_direct_connection_ibv_handler,
1983                                            c);
1984         if (c->ibv.fde_channel == NULL) {
1985                 TALLOC_FREE(c->rdma.fde_channel);
1986                 return NT_STATUS_NO_MEMORY;
1987         }
1988         c->sock.fde = tevent_add_fd(ev, c, c->sock.fd,
1989                                     sock_fde_flags,
1990                                     smb_direct_connection_sock_handler,
1991                                     c);
1992         if (c->sock.fde == NULL) {
1993                 TALLOC_FREE(c->rdma.fde_channel);
1994                 TALLOC_FREE(c->ibv.fde_channel);
1995                 return NT_STATUS_NO_MEMORY;
1996         }
1997
1998         c->last_ev = ev;
1999         return NT_STATUS_OK;
2000 }
2001
2002 #endif /* SMB_TRANSPORT_ENABLE_RDMA */