quiet...
[metze/samba/wip.git] / libcli / smb / smb_direct.c
1 /*
2    Unix SMB/CIFS implementation.
3    Infrastructure for SMB-Direct RDMA as transport
4    Copyright (C) Stefan Metzmacher 2012,2016
5
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "includes.h"
21 #include "system/network.h"
22 #include <tevent.h>
23 #include "lib/util/tevent_ntstatus.h"
24 #include "lib/tsocket/tsocket.h"
25 #include "lib/util/util_net.h"
26 #include "libcli/smb/smb_common.h"
27 #include "libcli/smb/smb_direct.h"
28 #include "lib/util/dlinklist.h"
29 #include "lib/util/iov_buf.h"
30 #include "librpc/ndr/libndr.h"
31
32 #ifdef SMB_TRANSPORT_ENABLE_RDMA
33 #include <rdma/rdma_cma_abi.h>
34 #include <rdma/rdma_cma.h>
35 #include <infiniband/verbs.h>
36
37 #define SMB_DIRECT_LISTENER_BACKLOG 5
38
39 //#define SMB_DIRECT_IO_MAX_DATA 8192
40 #define SMB_DIRECT_IO_MAX_DATA 1364
41 #define SMB_DIRECT_DATA_MIN_HDR_SIZE 0x14
42 #define SMB_DIRECT_DATA_OFFSET NDR_ROUND(SMB_DIRECT_DATA_MIN_HDR_SIZE, 8)
43
44 #define SMB_DIRECT_RESPONSE_REQUESTED 0x0001
45
46 struct smb_direct_listener;
47 struct smb_direct_connection;
48 struct smb_direct_io;
49
50 struct smb_direct_listener {
51         struct {
52                 struct smb_direct_rdma_context context;
53                 struct rdma_cm_id *cm_id;
54                 struct rdma_event_channel *cm_channel;
55                 struct tevent_fd *fde_channel;
56                 enum rdma_cm_event_type expected_event;
57                 /*
58                  * We fetch events from the ready queue and store it
59                  * here, it's acked in the listener destructor.
60                  */
61                 struct rdma_cm_event *cm_event;
62         } rdma;
63         struct smb_direct_connection *pending;
64         struct smb_direct_connection *ready;
65 };
66
67 struct smb_direct_connection {
68         struct smb_direct_connection *next, *prev; /* used in the listener ready list */
69         struct smb_direct_listener *l; /* only valid before fully accepted */
70
71         const void *last_ev;
72         struct {
73                 uint32_t max_send_size;
74                 uint32_t max_receive_size;
75                 uint32_t max_fragmented_size;
76                 uint32_t max_read_write_size;
77                 uint16_t send_credit_target;
78                 uint16_t send_credits;
79                 uint16_t receive_credit_max;
80                 uint16_t receive_posted;
81                 uint16_t receive_credit_target;
82                 uint16_t receive_credits;
83                 uint32_t keep_alive_internal;
84                 bool send_immediate;
85         } state;
86         struct {
87                 int tmp_fd; /* given to the caller end */
88                 int fd;
89                 struct tevent_fd *fde;
90         } sock;
91         struct {
92                 struct smb_direct_rdma_context context;
93                 struct rdma_event_channel *cm_channel;
94                 struct rdma_cm_id *cm_id;
95                 struct tevent_fd *fde_channel;
96                 enum rdma_cm_event_type expected_event;
97                 /*
98                  * We fetch events from the ready queue and store it
99                  * here, it's acked in the listener destructor.
100                  */
101                 struct rdma_cm_event *cm_event;
102                 struct rdma_conn_param conn_param;
103                 uint8_t ird_ord_hdr[8];
104         } rdma;
105         struct {
106                 struct ibv_pd *pd;
107                 struct ibv_comp_channel *comp_channel;
108                 struct tevent_fd *fde_channel;
109                 struct ibv_cq *send_cq;
110                 struct ibv_cq *recv_cq;
111                 struct ibv_qp *qp;
112                 struct ibv_qp_init_attr init_attr;
113         } ibv;
114
115         TALLOC_CTX *io_mem_ctx;
116         struct {
117                 /*
118                  * here we have io coming into
119                  * the rdma layer, which needs to
120                  * be flushed to the socketpair
121                  */
122                 struct smb_direct_io *idle;
123                 struct smb_direct_io *posted;
124                 struct smb_direct_io *ready;
125                 struct smb_direct_io *out;
126                 uint32_t remaining_length;
127         } r2s;
128         struct {
129                 /*
130                  * here we have io coming from the socketpair
131                  * which needs to be flushed into the rdma layer.
132                  */
133                 struct smb_direct_io *idle;
134                 struct smb_direct_io *posted;
135                 struct smb_direct_io *ready;
136                 struct smb_direct_io *in;
137                 uint32_t remaining_length;
138         } s2r;
139 };
140
141 struct smb_direct_connection *smb_direct_conn;
142
143
144 struct smb_direct_io {
145         struct smb_direct_io *prev, *next;
146
147         struct ibv_mr *hdr_mr;
148         struct ibv_mr *data_mr;
149         struct ibv_sge sge[2];
150
151         struct ibv_recv_wr recv_wr;
152         struct ibv_send_wr send_wr;
153
154         struct iovec _iov_array[2];
155         struct iovec *iov;
156         int iov_count;
157
158         uint32_t data_length;
159         uint32_t remaining_length;
160
161         uint8_t nbt_hdr[NBT_HDR_SIZE];
162         uint8_t smbd_hdr[SMB_DIRECT_DATA_OFFSET];
163         uint8_t data[SMB_DIRECT_IO_MAX_DATA];
164 };
165
166 static int smb_direct_io_destructor(struct smb_direct_io *io);
167
168 static struct smb_direct_io *smb_direct_io_create(struct smb_direct_connection *c)
169 {
170         struct smb_direct_io *io;
171
172         if (c->io_mem_ctx == NULL) {
173                 return NULL;
174         }
175
176         io = talloc_zero(c->io_mem_ctx, struct smb_direct_io);
177         if (io == NULL) {
178                 return NULL;
179         }
180         talloc_set_destructor(io, smb_direct_io_destructor);
181
182         io->hdr_mr = ibv_reg_mr(c->ibv.pd,
183                                 io->smbd_hdr,
184                                 sizeof(io->smbd_hdr),
185                                 IBV_ACCESS_LOCAL_WRITE);
186         if (io->hdr_mr == NULL) {
187                 TALLOC_FREE(io);
188                 return NULL;
189         }
190
191         io->data_mr = ibv_reg_mr(c->ibv.pd,
192                                  io->data,
193                                  sizeof(io->data),
194                                  IBV_ACCESS_LOCAL_WRITE);
195         if (io->data_mr == NULL) {
196                 TALLOC_FREE(io);
197                 return NULL;
198         }
199
200         io->sge[0].addr = (uint64_t) (uintptr_t) io->smbd_hdr;
201         io->sge[0].length = sizeof(io->smbd_hdr);
202         io->sge[0].lkey = io->hdr_mr->lkey;
203         io->sge[1].addr = (uint64_t) (uintptr_t) io->data;
204         io->sge[1].length = sizeof(io->data);
205         io->sge[1].lkey = io->data_mr->lkey;
206
207         io->send_wr.wr_id = (uint64_t) (uintptr_t) io;
208         io->send_wr.opcode = IBV_WR_SEND;
209         io->send_wr.send_flags = IBV_SEND_SIGNALED;
210         io->send_wr.sg_list = io->sge;
211         io->send_wr.num_sge = ARRAY_SIZE(io->sge);
212
213         io->recv_wr.wr_id = (uint64_t) (uintptr_t) io;
214         io->recv_wr.sg_list = io->sge;
215         io->recv_wr.num_sge = ARRAY_SIZE(io->sge);
216
217         return io;
218 }
219
220 static int smb_direct_io_destructor(struct smb_direct_io *io)
221 {
222         if (io->hdr_mr != NULL) {
223                 ibv_dereg_mr(io->hdr_mr);
224                 io->hdr_mr = NULL;
225         }
226
227         if (io->data_mr != NULL) {
228                 ibv_dereg_mr(io->data_mr);
229                 io->data_mr = NULL;
230         }
231
232         return 0;
233 }
234
235 static int smb_direct_connection_destructor(struct smb_direct_connection *c);
236
237 #if 1
238 #undef DEBUG
239 #define DEBUG( _level, body ) do {\
240         int level = 5; \
241   (void)( ((level) <= MAX_DEBUG_LEVEL) && \
242           unlikely(DEBUGLEVEL_CLASS[ DBGC_CLASS ] >= (level))           \
243        && (dbghdrclass( level, DBGC_CLASS, __location__, __FUNCTION__ )) \
244        && (dbgtext body) ); \
245 } while(0)
246 #endif
247 struct smb_direct_connection *smb_direct_connection_create(TALLOC_CTX *mem_ctx)
248 {
249         struct smb_direct_connection *c;
250         int sfd[2];
251         int ret;
252         //uint16_t i;
253
254         c = talloc_zero(mem_ctx, struct smb_direct_connection);
255         if (c == NULL) {
256                 return NULL;
257         }
258         c->sock.fd = -1;
259         c->sock.tmp_fd = -1;
260 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
261         talloc_set_destructor(c, smb_direct_connection_destructor);
262
263         c->state.max_send_size       = 1364;
264         c->state.max_receive_size    = SMB_DIRECT_IO_MAX_DATA;
265         c->state.max_fragmented_size = 1048576;
266         c->state.max_read_write_size = 1048576;
267         c->state.receive_credit_max  = 10;//255;
268         c->state.send_credit_target  = 255;
269         c->state.keep_alive_internal = 5;
270
271         ret = socketpair(AF_UNIX, SOCK_STREAM, 0, sfd);
272         if (ret == -1) {
273                 int saved_errno = errno;
274                 TALLOC_FREE(c);
275 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
276                 errno = saved_errno;
277                 return NULL;
278         }
279         c->sock.tmp_fd = sfd[0];
280         c->sock.fd = sfd[1];
281
282 DEBUG(0,("%s:%s: sock.fd[%d] sock.tmp_fd[%d]\n",
283         __location__, __func__, c->sock.fd, c->sock.tmp_fd));
284
285         smb_set_close_on_exec(c->sock.tmp_fd);
286         smb_set_close_on_exec(c->sock.fd);
287         set_blocking(c->sock.fd, false);
288         set_blocking(c->sock.tmp_fd, false);
289
290         c->rdma.cm_channel = rdma_create_event_channel();
291         if (c->rdma.cm_channel == NULL) {
292                 TALLOC_FREE(c);
293 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
294                 return NULL;
295         }
296         smb_set_close_on_exec(c->rdma.cm_channel->fd);
297         //set_blocking(c->rdma.cm_channel->fd, false);
298
299         c->rdma.context.c = c;
300
301         c->ibv.init_attr.cap.max_send_wr = c->state.send_credit_target; // more for RDMA READ/WRITE??
302         c->ibv.init_attr.cap.max_recv_wr = c->state.receive_credit_max; // more for RDMA READ/WRITE??
303         c->ibv.init_attr.cap.max_recv_sge = 2;
304         c->ibv.init_attr.cap.max_send_sge = 2;
305         c->ibv.init_attr.qp_type = IBV_QPT_RC;
306         c->ibv.init_attr.sq_sig_all = 1;
307
308 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
309         return c;
310 }
311
312 static NTSTATUS smb_direct_connection_complete_alloc(struct smb_direct_connection *c)
313 {
314         int ret;
315         uint16_t i;
316
317         c->ibv.comp_channel = ibv_create_comp_channel(c->rdma.cm_id->verbs);
318         if (c->ibv.comp_channel == NULL) {
319 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
320                 return NT_STATUS_NO_MEMORY;
321         }
322         smb_set_close_on_exec(c->ibv.comp_channel->fd);
323         //set_blocking(c->ibv.comp_channel->fd, false);
324
325 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
326         c->ibv.pd = ibv_alloc_pd(c->rdma.cm_id->verbs);
327         if (c->ibv.pd == NULL) {
328 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
329                 return NT_STATUS_NO_MEMORY;
330         }
331
332         c->ibv.send_cq = ibv_create_cq(c->rdma.cm_id->verbs,
333                                        c->ibv.init_attr.cap.max_send_wr,
334                                        c, c->ibv.comp_channel, 0);
335         if (c->ibv.send_cq == NULL) {
336 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
337                 return NT_STATUS_NO_MEMORY;
338         }
339         c->ibv.init_attr.send_cq = c->ibv.send_cq;
340
341         c->ibv.recv_cq = ibv_create_cq(c->rdma.cm_id->verbs,
342                                        c->ibv.init_attr.cap.max_recv_wr,
343                                        c, c->ibv.comp_channel, 0);
344         if (c->ibv.recv_cq == NULL) {
345 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
346                 return NT_STATUS_NO_MEMORY;
347         }
348         c->ibv.init_attr.recv_cq = c->ibv.recv_cq;
349
350         ret = ibv_req_notify_cq(c->ibv.send_cq, 0);
351         if (ret != 0) {
352 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
353         }
354
355         ret = ibv_req_notify_cq(c->ibv.recv_cq, 0);
356         if (ret != 0) {
357 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
358                 return NT_STATUS_NO_MEMORY;
359         }
360
361         ret = rdma_create_qp(c->rdma.cm_id, c->ibv.pd, &c->ibv.init_attr);
362         if (ret != 0) {
363 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
364                 return NT_STATUS_NO_MEMORY;
365         }
366         c->ibv.qp = c->rdma.cm_id->qp;
367
368         c->io_mem_ctx = talloc_named_const(c, 0, "io_mem_ctx");
369         if (c->io_mem_ctx == NULL) {
370 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
371                 return NT_STATUS_NO_MEMORY;
372         }
373
374         for (i = 0; i < c->state.receive_credit_max; i++) {
375                 struct smb_direct_io *io;
376
377                 io = smb_direct_io_create(c);
378                 if (io == NULL) {
379 DEBUG(0,("%s:%s: SETUP r2s here...\n", __location__, __func__));
380                         return NT_STATUS_NO_MEMORY;
381                 }
382                 DLIST_ADD_END(c->r2s.idle, io);
383 //DEBUG(0,("%s:%s: SETUP r2s here...\n", __location__, __func__));
384         }
385
386         for (i = 0; i < c->state.send_credit_target; i++) {
387                 struct smb_direct_io *io;
388
389                 io = smb_direct_io_create(c);
390                 if (io == NULL) {
391 DEBUG(0,("%s:%s: SETUP s2r here...\n", __location__, __func__));
392                         return NT_STATUS_NO_MEMORY;
393                 }
394                 DLIST_ADD_END(c->s2r.idle, io);
395 //DEBUG(0,("%s:%s: SETUP s2r here...\n", __location__, __func__));
396         }
397
398 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
399         return NT_STATUS_OK;
400 }
401
402 static void smb_direct_connection_debug_credits(struct smb_direct_connection *c,
403                                                 const char *reason,
404                                                 const struct smb_direct_io *io,
405                                                 const char *location, const char *func)
406 {
407         DEBUG(0,("%s:%s: IO[%p] CREDITS: RMAX[%u] RPOSTED[%u] RTARGET[%u] R[%u] RSIZE[%u] "
408                  "STARGET[%u] S[%u] SSIZE[%u] MF[%u] MRW[%u]\n",
409                 location, reason, io, //"",//func,
410                 c->state.receive_credit_max,
411                 c->state.receive_posted,
412                 c->state.receive_credit_target,
413                 c->state.receive_credits,
414                 c->state.max_receive_size,
415                 c->state.send_credit_target,
416                 c->state.send_credits,
417                 c->state.max_send_size,
418                 c->state.max_fragmented_size,
419                 c->state.max_read_write_size));
420 }
421
422 static int smb_direct_connection_destructor(struct smb_direct_connection *c)
423 {
424         if (c->l != NULL) {
425                 DLIST_REMOVE(c->l->ready, c);
426                 c->l = NULL;
427         }
428
429         TALLOC_FREE(c->sock.fde);
430
431         if (c->sock.fd != -1) {
432                 close(c->sock.fd);
433                 c->sock.fd = -1;
434         }
435
436         if (c->sock.tmp_fd != -1) {
437                 close(c->sock.tmp_fd);
438                 c->sock.tmp_fd = -1;
439         }
440
441         TALLOC_FREE(c->ibv.fde_channel);
442         TALLOC_FREE(c->rdma.fde_channel);
443
444         TALLOC_FREE(c->io_mem_ctx);
445         ZERO_STRUCT(c->r2s);
446         ZERO_STRUCT(c->s2r);
447
448         if (c->rdma.cm_event != NULL) {
449                 rdma_ack_cm_event(c->rdma.cm_event);
450                 c->rdma.cm_event = NULL;
451         }
452
453         if (c->ibv.qp != NULL) {
454                 ibv_destroy_qp(c->ibv.qp);
455                 c->ibv.qp = NULL;
456         }
457
458         if (c->ibv.send_cq != NULL) {
459                 ibv_destroy_cq(c->ibv.send_cq);
460                 c->ibv.send_cq = NULL;
461         }
462
463         if (c->ibv.recv_cq != NULL) {
464                 ibv_destroy_cq(c->ibv.recv_cq);
465                 c->ibv.recv_cq = NULL;
466         }
467
468         if (c->ibv.comp_channel != NULL) {
469                 ibv_destroy_comp_channel(c->ibv.comp_channel);
470                 c->ibv.comp_channel = NULL;
471         }
472
473         if (c->ibv.pd != NULL) {
474                 ibv_dealloc_pd(c->ibv.pd);
475                 c->ibv.pd = NULL;
476         }
477
478         if (c->rdma.cm_id != NULL) {
479                 rdma_destroy_id(c->rdma.cm_id);
480                 c->rdma.cm_id = NULL;
481         }
482
483         if (c->rdma.cm_channel != NULL) {
484                 rdma_destroy_event_channel(c->rdma.cm_channel);
485                 c->rdma.cm_channel = NULL;
486         }
487
488         return 0;
489 }
490
491 static int smb_direct_connection_post_recv(struct smb_direct_connection *c)
492 {
493         struct smb_direct_io *io = NULL;
494         struct ibv_recv_wr *bad_recv_wr = NULL;
495         int ret;
496
497 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
498 smb_direct_connection_debug_credits(c, "post_recv", NULL, __location__, __func__);
499         if (c->r2s.idle == NULL) {
500 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
501                 return 0;
502         }
503
504         for (io = c->r2s.idle; io != NULL; io = io->next) {
505 //DEBUG(0,("%s:%s: POST RECV[%p]\n", __location__, __func__, io));
506                 c->state.receive_posted += 1;
507 smb_direct_connection_debug_credits(c, "POST_RECV", io, __location__, __func__);
508                 if (io->next == NULL) {
509                         io->recv_wr.next = NULL;
510                         break;
511                 }
512
513                 io->recv_wr.next = &io->next->recv_wr;
514         }
515
516         errno = 0;
517         ret = ibv_post_recv(c->ibv.qp, &c->r2s.idle->recv_wr, &bad_recv_wr);
518         if (ret != 0) {
519                 NTSTATUS status;
520                 status = map_nt_error_from_unix_common(errno);
521                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
522                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
523                 return ret;
524         }
525
526         DLIST_CONCATENATE(c->r2s.posted, c->r2s.idle);
527         c->r2s.idle = NULL;
528
529 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
530         return 0;
531 }
532
533 static int smb_direct_connection_post_send(struct smb_direct_connection *c)
534 {
535         struct smb_direct_io *io = NULL;
536         struct smb_direct_io *next = NULL;
537         struct smb_direct_io *posted = NULL;
538         struct smb_direct_io *last = NULL;
539         struct ibv_send_wr *bad_send_wr = NULL;
540         int ret;
541
542 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
543 smb_direct_connection_debug_credits(c, "post_send", NULL, __location__, __func__);
544         if (c->s2r.ready == NULL) {
545 DEBUG(0,("%s:%s: none READY\n", __location__, __func__));
546                 return 0;
547         }
548
549         if (c->state.send_credits == 0) {
550 DEBUG(0,("%s:%s: no credits\n", __location__, __func__));
551                 return 0;
552         }
553
554 #if 0
555         if (1) {
556 DEBUG(0,("%s:%s: TODO: REMOVE me ...here...\n", __location__, __func__));
557                 return 0;
558         }
559 #endif
560
561         for (io = c->s2r.ready; io != NULL; io = next) {
562                 uint16_t granted = 0;
563                 uint16_t flags = 0;
564                 uint32_t data_offset = 0;
565
566                 next = io->next;
567
568                 if (c->state.send_credits == 0) {
569 smb_direct_connection_debug_credits(c, "WAIT_FOR_POST_SEND", io, __location__, __func__);
570                         break;
571                 }
572
573                 c->state.send_credits -= 1;
574
575                 if (c->state.send_credits == 0) {
576                         flags |= SMB_DIRECT_RESPONSE_REQUESTED;
577                 }
578
579                 granted = c->state.receive_posted;
580                 granted -= c->state.receive_credits;
581                 granted = MIN(granted, c->state.receive_credit_target);
582                 c->state.receive_credits += granted;
583 smb_direct_connection_debug_credits(c, "POST_SEND", io, __location__, __func__);
584
585 //              if (c->state.receive_credits == 0) {
586 //                      granted = 1;
587 //                      c->state.receive_credits += granted;
588 //              }
589
590                 io->send_wr.sg_list = io->sge;
591                 if (io->data_length > 0) {
592                         data_offset = SMB_DIRECT_DATA_OFFSET;
593                         io->sge[0].length = data_offset;
594                         io->sge[1].length = io->data_length;
595                         io->send_wr.num_sge = 2;
596                 } else {
597                         io->sge[0].length = SMB_DIRECT_DATA_MIN_HDR_SIZE;
598                         io->send_wr.num_sge = 1;
599                 }
600
601                 SSVAL(io->smbd_hdr, 0x00, c->state.send_credit_target);
602                 SSVAL(io->smbd_hdr, 0x02, granted);
603                 SSVAL(io->smbd_hdr, 0x04, flags);
604                 SSVAL(io->smbd_hdr, 0x06, 0x0000);
605                 SIVAL(io->smbd_hdr, 0x08, io->remaining_length);
606                 SIVAL(io->smbd_hdr, 0x0C, data_offset);
607                 SIVAL(io->smbd_hdr, 0x10, io->data_length);
608                 SIVAL(io->smbd_hdr, 0x14, 0x00000000);
609
610                 if (next != NULL) {
611                         io->send_wr.next = &next->send_wr;
612                 } else {
613                         io->send_wr.next = NULL;
614                 }
615                         io->send_wr.next = NULL;
616                 DLIST_REMOVE(c->s2r.ready, io);
617                 DLIST_ADD_END(posted, io);
618                 DEBUG(0,("%s:%s: POST SEND[%p] data_length[%u] remaining_length[%u]\n",
619                         __location__, __FUNCTION__, io,
620                         (unsigned)io->data_length, (unsigned)io->remaining_length));
621         errno = 0;
622         ret = ibv_post_send(c->ibv.qp, &io->send_wr, &bad_send_wr);
623         if (ret != 0) {
624                 NTSTATUS status;
625         //      DLIST_CONCATENATE(c->s2r.ready, posted); // TODO: check bad_send_wr
626                 status = map_nt_error_from_unix_common(errno);
627                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
628                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
629                 return ret;
630         }
631         }
632
633         last = DLIST_TAIL(posted);
634         last->send_wr.next = NULL;
635
636         //errno = 0;
637         //ret = ibv_post_send(c->ibv.qp, &posted->send_wr, &bad_send_wr);
638         //if (ret != 0) {
639         //      NTSTATUS status;
640         ////    DLIST_CONCATENATE(c->s2r.ready, posted); // TODO: check bad_send_wr
641         //      status = map_nt_error_from_unix_common(errno);
642         //      DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
643         //              __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
644         //      return ret;
645         //}
646
647         DLIST_CONCATENATE(c->s2r.posted, posted);
648
649         return 0;
650 }
651 #if 0
652 static int smb_direct_connection_post_io(struct smb_direct_connection *c)
653 {
654         int ret;
655
656         errno = 0;
657         ret = smb_direct_connection_post_recv(c);
658         if (ret != 0) {
659                 NTSTATUS status;
660                 status = map_nt_error_from_unix_common(errno);
661                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
662                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
663                 return ret;
664         }
665
666         errno = 0;
667         ret = smb_direct_connection_post_send(c);
668         if (ret != 0) {
669                 NTSTATUS status;
670                 status = map_nt_error_from_unix_common(errno);
671                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
672                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
673                 return ret;
674         }
675
676         return 0;
677 }
678 #endif
679 static int smb_direct_connection_post_io(struct smb_direct_connection *c)
680 {
681         struct smb_direct_io *io = NULL;
682         int ret;
683         bool need_keepalive = false;
684
685 DEBUG(0,("%s:%s: IO "
686         "s2r posted[%p] ready[%p] idle[%p] in[%p] remaining[%u] "
687         "r2s posted[%p] ready[%p] idle[%p] out[%p] remaining[%u]\n",
688         __location__, __func__,
689         c->s2r.posted, c->s2r.ready, c->s2r.idle, c->s2r.in, c->s2r.remaining_length,
690         c->r2s.posted, c->r2s.ready, c->r2s.idle, c->r2s.out, c->r2s.remaining_length));
691
692         errno = 0;
693         ret = smb_direct_connection_post_recv(c);
694         if (ret != 0) {
695                 NTSTATUS status;
696                 status = map_nt_error_from_unix_common(errno);
697                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
698                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
699                 return ret;
700         }
701
702         if (c->state.send_immediate) {
703                 need_keepalive = true;
704         }
705
706         if (need_keepalive && c->s2r.posted != NULL) {
707 DEBUG(0,("%s:%s: KEEP skip...(posted)\n", __location__, __func__));
708                 need_keepalive = false;
709         }
710
711         if (c->state.receive_credits == 0) {
712                 need_keepalive = true;
713         }
714
715         if (need_keepalive && c->state.receive_posted == 0) {
716 DEBUG(0,("%s:%s: KEEP skip...(no RECV posted)\n", __location__, __func__));
717                 need_keepalive = false;
718         }
719
720         if (need_keepalive && c->s2r.ready != NULL) {
721 DEBUG(0,("%s:%s: KEEP skip...(ready)\n", __location__, __func__));
722                 need_keepalive = false;
723         }
724
725         if (need_keepalive && c->s2r.idle == NULL) {
726 DEBUG(0,("%s:%s: KEEP skip...(no idle)\n", __location__, __func__));
727                 need_keepalive = false;
728         }
729
730 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
731         if (need_keepalive) {
732                 io = c->s2r.idle;
733                 DLIST_REMOVE(c->s2r.idle, io);
734
735                 io->data_length = 0;
736                 io->remaining_length = 0;
737
738                 DLIST_ADD_END(c->s2r.ready, io);
739         }
740
741 //smb_direct_connection_debug_credits(c, "POST_KEEP", io, __location__, __func__);
742         ret = smb_direct_connection_post_send(c);
743         if (ret != 0) {
744                 NTSTATUS status;
745                 status = map_nt_error_from_unix_common(errno);
746                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
747                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
748                 return ret;
749         }
750 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
751
752         if (c->state.send_immediate) {
753                 if (c->s2r.posted != NULL) {
754                         c->state.send_immediate = false;
755                 }
756         }
757
758         return 0;
759 }
760 static int smb_direct_connection_setup_readv(struct smb_direct_connection *c)
761 {
762 DEBUG(0,("%s:%s: TEVENT_FD_READABLE on\n", __location__, __func__));
763         TEVENT_FD_READABLE(c->sock.fde);
764         // TODO: immediate_event?? may skips a syscall.
765         return 0;
766 }
767
768 static int smb_direct_connection_setup_writev(struct smb_direct_connection *c)
769 {
770 DEBUG(0,("%s:%s: TEVENT_FD_WRITEABLE on\n", __location__, __func__));
771         TEVENT_FD_WRITEABLE(c->sock.fde);
772         // TODO: immediate_event?? may skips a syscall.
773         return 0;
774 }
775
776 struct smb_direct_connection_rdma_connect_state {
777         struct smb_direct_connection *c;
778 };
779
780 static int smb_direct_connection_rdma_connect_state_destructor(
781                struct smb_direct_connection_rdma_connect_state *state)
782 {
783         struct smb_direct_connection *c = state->c;
784
785         TALLOC_FREE(c->rdma.fde_channel);
786
787         return 0;
788 }
789
790 static void smb_direct_connection_rdma_connect_handler(struct tevent_context *ev,
791                                             struct tevent_fd *fde,
792                                             uint16_t flags,
793                                             void *private_data);
794
795 static struct tevent_req *smb_direct_connection_rdma_connect_send(TALLOC_CTX *mem_ctx,
796                                         struct tevent_context *ev,
797                                         struct smb_direct_connection *c,
798                                         const struct sockaddr_storage *src,
799                                         const struct sockaddr_storage *dst,
800                                         struct tsocket_address *local_addr,
801                                         struct tsocket_address *remote_addr)
802 {
803         struct tevent_req *req;
804         struct smb_direct_connection_rdma_connect_state *state;
805         int ret;
806         //struct sockaddr *src_addr = (const struct sockaddr *)src;
807         struct sockaddr *src_addr = NULL;
808         struct sockaddr_storage _dst_addr = *dst;
809         struct sockaddr *dst_addr = (struct sockaddr *)&_dst_addr;
810
811         set_sockaddr_port(dst_addr, 5445);
812
813         req = tevent_req_create(mem_ctx, &state,
814                                 struct smb_direct_connection_rdma_connect_state);
815         if (req == NULL) {
816                 return NULL;
817         }
818         state->c = c;
819
820         DEBUG(0,("%s:%s: here...\n", __location__, __func__));
821         talloc_set_destructor(state, smb_direct_connection_rdma_connect_state_destructor);
822
823 #if RDMA_USER_CM_MAX_ABI_VERSION >= 2
824         ret = rdma_create_id(c->rdma.cm_channel,
825                              &c->rdma.cm_id,
826                              &c->rdma.context,
827                              RDMA_PS_TCP);
828 #else
829 #error
830         ret = rdma_create_id(c->rdma.cm_channel,
831                              &c->rdma.cm_id,
832                              &c->rdma.context);
833 #endif
834         if (ret != 0) {
835 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
836                 tevent_req_oom(req);
837                 return tevent_req_post(req, ev);
838         }
839
840         c->rdma.fde_channel = tevent_add_fd(ev, state,
841                                         c->rdma.cm_channel->fd,
842                                         TEVENT_FD_READ,
843                                         smb_direct_connection_rdma_connect_handler,
844                                         req);
845         if (tevent_req_nomem(c->rdma.fde_channel, req)) {
846                 return tevent_req_post(req, ev);
847         }
848
849         errno = 0;
850         ret = rdma_resolve_addr(c->rdma.cm_id,
851                                 src_addr, dst_addr,
852                                 5000);
853         if (ret != 0) {
854                 NTSTATUS status = map_nt_error_from_unix_common(errno);
855                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
856                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
857                 tevent_req_nterror(req, status);
858                 return tevent_req_post(req, ev);
859         }
860         c->rdma.expected_event = RDMA_CM_EVENT_ADDR_RESOLVED;
861
862         return req;
863 }
864
865 static void smb_direct_connection_rdma_connect_handler(struct tevent_context *ev,
866                                             struct tevent_fd *fde,
867                                             uint16_t flags,
868                                             void *private_data)
869 {
870         struct tevent_req *req =
871                 talloc_get_type_abort(private_data,
872                 struct tevent_req);
873         struct smb_direct_connection_rdma_connect_state *state =
874                 tevent_req_data(req,
875                 struct smb_direct_connection_rdma_connect_state);
876         struct smb_direct_connection *c = state->c;
877         struct rdma_conn_param conn_param;
878         uint8_t ird_ord_hdr[8];
879         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
880         int ret;
881
882         errno = 0;
883         ret = rdma_get_cm_event(c->rdma.cm_channel,
884                                 &c->rdma.cm_event);
885         if (ret != 0) {
886                 status = map_nt_error_from_unix_common(errno);
887                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
888                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
889                 tevent_req_nterror(req, status);
890                 return;
891         }
892
893         errno = 0;
894         if (c->rdma.cm_event->status != 0) {
895                 errno = c->rdma.cm_event->status;
896                 status = map_nt_error_from_unix_common(errno);
897                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
898                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
899                 tevent_req_nterror(req, status);
900                 return;
901         }
902
903         if (c->rdma.cm_event->event != c->rdma.expected_event) {
904                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
905                         __location__, __FUNCTION__, ret, errno));
906
907         }
908
909         switch (c->rdma.cm_event->event) {
910         case RDMA_CM_EVENT_ADDR_RESOLVED:
911         errno = 0;
912                 ret = rdma_resolve_route(c->rdma.cm_id, 5000);
913                 if (ret != 0) {
914                         status = map_nt_error_from_unix_common(errno);
915                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
916                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
917                         tevent_req_nterror(req, status);
918                         return;
919                 }
920                 c->rdma.expected_event = RDMA_CM_EVENT_ROUTE_RESOLVED;
921                 break;
922         case RDMA_CM_EVENT_ROUTE_RESOLVED:
923         errno = 0;
924         ret = 0;
925                 status = smb_direct_connection_complete_alloc(c);
926                 if (!NT_STATUS_IS_OK(status)) {
927                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
928                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
929                         tevent_req_nterror(req, status);
930                         return;
931                 }
932
933                 RSIVAL(ird_ord_hdr, 0, 16);
934                 RSIVAL(ird_ord_hdr, 4, 0);
935
936                 ZERO_STRUCT(conn_param);
937                 conn_param.private_data = ird_ord_hdr;
938                 conn_param.private_data_len = sizeof(ird_ord_hdr);
939                 conn_param.responder_resources = 1;
940                 conn_param.initiator_depth = 1;
941                 conn_param.retry_count = 10;
942
943                 errno = 0;
944                 ret = rdma_connect(c->rdma.cm_id, &conn_param);
945                 if (ret != 0) {
946                         status = map_nt_error_from_unix_common(errno);
947                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
948                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
949                         tevent_req_nterror(req, status);
950                         return;
951                 }
952                 c->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED;
953                 break;
954
955         case RDMA_CM_EVENT_ESTABLISHED:
956                 errno = 0;
957                 ret = 0;
958                 //DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
959                 //      __location__, __FUNCTION__, ret, errno));
960
961                 c->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED;
962                 TALLOC_FREE(c->rdma.fde_channel);
963                 rdma_ack_cm_event(c->rdma.cm_event);
964                 c->rdma.cm_event = NULL;
965                 tevent_req_done(req);
966                 return;
967
968         case RDMA_CM_EVENT_ADDR_ERROR:
969         case RDMA_CM_EVENT_ROUTE_ERROR:
970         case RDMA_CM_EVENT_CONNECT_REQUEST:
971         case RDMA_CM_EVENT_CONNECT_RESPONSE:
972         case RDMA_CM_EVENT_CONNECT_ERROR:
973         case RDMA_CM_EVENT_UNREACHABLE:
974         case RDMA_CM_EVENT_REJECTED:
975         case RDMA_CM_EVENT_DISCONNECTED:
976         case RDMA_CM_EVENT_DEVICE_REMOVAL:
977         case RDMA_CM_EVENT_MULTICAST_JOIN:
978         case RDMA_CM_EVENT_MULTICAST_ERROR:
979         case RDMA_CM_EVENT_ADDR_CHANGE:
980         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
981                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
982                 DEBUG(0,("%s:%s: event[%d] ret[%d] errno[%d] status[%s]\n",
983                         __location__, __FUNCTION__,
984                         c->rdma.cm_event->event, ret, errno, nt_errstr(status)));
985                 tevent_req_nterror(req, status);
986                 return;
987         }
988
989         rdma_ack_cm_event(c->rdma.cm_event);
990         c->rdma.cm_event = NULL;
991 }
992
993 static NTSTATUS smb_direct_connection_rdma_connect_recv(struct tevent_req *req)
994 {
995 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
996         return tevent_req_simple_recv_ntstatus(req);
997 }
998
999 struct smb_direct_connection_negotiate_connect_state {
1000         struct smb_direct_connection *c;
1001 #if 0
1002         struct {
1003                 struct ibv_sge sge[1];
1004                 struct ibv_send_wr wr;
1005         } rdma_read;
1006         struct {
1007                 uint8_t buffer[0x14];
1008                 struct ibv_mr *mr;
1009                 struct ibv_sge sge[1];
1010                 struct ibv_send_wr wr;
1011         } req;
1012         struct {
1013                 uint8_t buffer[512];//0x20];
1014                 struct ibv_mr *mr;
1015                 struct ibv_sge sge[1];
1016                 struct ibv_recv_wr wr;
1017         } rep;
1018 #endif
1019 };
1020
1021 static int smb_direct_connection_negotiate_connect_destructor(
1022                struct smb_direct_connection_negotiate_connect_state *state)
1023 {
1024         struct smb_direct_connection *c = state->c;
1025
1026         TALLOC_FREE(c->ibv.fde_channel);
1027         TALLOC_FREE(c->rdma.fde_channel);
1028
1029         return 0;
1030 }
1031
1032 static void smb_direct_connection_negotiate_connect_rdma_handler(struct tevent_context *ev,
1033                                               struct tevent_fd *fde,
1034                                               uint16_t flags,
1035                                               void *private_data);
1036 static void smb_direct_connection_negotiate_connect_ibv_handler(struct tevent_context *ev,
1037                                              struct tevent_fd *fde,
1038                                              uint16_t flags,
1039                                              void *private_data);
1040
1041 static struct tevent_req *smb_direct_connection_negotiate_connect_send(TALLOC_CTX *mem_ctx,
1042                                                      struct tevent_context *ev,
1043                                                      struct smb_direct_connection *c)
1044 {
1045         struct tevent_req *req;
1046         struct smb_direct_connection_negotiate_connect_state *state;
1047         struct smb_direct_io *rdma_read = NULL;
1048         struct ibv_send_wr *bad_send_wr = NULL;
1049         NTSTATUS status;
1050         int ret;
1051
1052         req = tevent_req_create(mem_ctx, &state,
1053                                 struct smb_direct_connection_negotiate_connect_state);
1054         if (req == NULL) {
1055                 return NULL;
1056         }
1057         state->c = c;
1058
1059 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1060         // TODO: cleanup
1061         talloc_set_destructor(state, smb_direct_connection_negotiate_connect_destructor);
1062
1063         c->rdma.fde_channel = tevent_add_fd(ev, state,
1064                                             c->rdma.cm_channel->fd,
1065                                             TEVENT_FD_READ,
1066                                             smb_direct_connection_negotiate_connect_rdma_handler,
1067                                             req);
1068         if (tevent_req_nomem(c->rdma.fde_channel, req)) {
1069                 return tevent_req_post(req, ev);
1070         }
1071         c->ibv.fde_channel = tevent_add_fd(ev, state,
1072                                            c->ibv.comp_channel->fd,
1073                                            TEVENT_FD_READ,
1074                                            smb_direct_connection_negotiate_connect_ibv_handler,
1075                                            req);
1076         if (tevent_req_nomem(c->ibv.fde_channel, req)) {
1077                 return tevent_req_post(req, ev);
1078         }
1079
1080         rdma_read = smb_direct_io_create(c);
1081         if (tevent_req_nomem(rdma_read, req)) {
1082                 return tevent_req_post(req, ev);
1083         }
1084         rdma_read->sge[0].addr = 1;
1085         rdma_read->sge[0].length = 0;
1086         rdma_read->sge[0].lkey = 1;
1087         rdma_read->send_wr.opcode = IBV_WR_RDMA_READ;
1088         rdma_read->send_wr.send_flags = IBV_SEND_SIGNALED;
1089         rdma_read->send_wr.sg_list = rdma_read->sge;
1090         rdma_read->send_wr.num_sge = 1;
1091         rdma_read->send_wr.wr.rdma.rkey = 1;
1092         rdma_read->send_wr.wr.rdma.remote_addr = 1;
1093
1094         errno = 0;
1095         ret = ibv_post_send(c->ibv.qp, &rdma_read->send_wr, &bad_send_wr);
1096         if (ret != 0) {
1097                 status = map_nt_error_from_unix_common(errno);
1098                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1099                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1100                 tevent_req_nterror(req, status);
1101                 return tevent_req_post(req, ev);
1102         }
1103
1104                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1105                         __location__, __FUNCTION__, ret, errno));
1106         return req;
1107 }
1108
1109 static void smb_direct_connection_negotiate_connect_rdma_handler(struct tevent_context *ev,
1110                                               struct tevent_fd *fde,
1111                                               uint16_t flags,
1112                                               void *private_data)
1113 {
1114         struct tevent_req *req =
1115                 talloc_get_type_abort(private_data,
1116                 struct tevent_req);
1117         struct smb_direct_connection_negotiate_connect_state *state =
1118                 tevent_req_data(req,
1119                 struct smb_direct_connection_negotiate_connect_state);
1120         struct smb_direct_connection *c = state->c;
1121         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1122         int ret;
1123
1124         errno = 0;
1125         ret = rdma_get_cm_event(c->rdma.cm_channel,
1126                                 &c->rdma.cm_event);
1127         if (ret != 0) {
1128                 status = map_nt_error_from_unix_common(errno);
1129                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1130                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1131                 tevent_req_nterror(req, status);
1132                 return;
1133         }
1134
1135         if (c->rdma.cm_event->status != 0) {
1136                 errno = c->rdma.cm_event->status;
1137                 status = map_nt_error_from_unix_common(errno);
1138                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1139                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1140                 tevent_req_nterror(req, status);
1141                 return;
1142         }
1143
1144         switch (c->rdma.cm_event->event) {
1145         case RDMA_CM_EVENT_DISCONNECTED:
1146                 status = NT_STATUS_CONNECTION_DISCONNECTED;
1147                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1148                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1149                 tevent_req_nterror(req, status);
1150                 return;
1151         case RDMA_CM_EVENT_ADDR_RESOLVED:
1152         case RDMA_CM_EVENT_ADDR_ERROR:
1153         case RDMA_CM_EVENT_ROUTE_RESOLVED:
1154         case RDMA_CM_EVENT_ESTABLISHED:
1155         case RDMA_CM_EVENT_ROUTE_ERROR:
1156         case RDMA_CM_EVENT_CONNECT_REQUEST:
1157         case RDMA_CM_EVENT_CONNECT_RESPONSE:
1158         case RDMA_CM_EVENT_CONNECT_ERROR:
1159         case RDMA_CM_EVENT_UNREACHABLE:
1160         case RDMA_CM_EVENT_REJECTED:
1161         case RDMA_CM_EVENT_DEVICE_REMOVAL:
1162         case RDMA_CM_EVENT_MULTICAST_JOIN:
1163         case RDMA_CM_EVENT_MULTICAST_ERROR:
1164         case RDMA_CM_EVENT_ADDR_CHANGE:
1165         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1166                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1167                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1168                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1169                 tevent_req_nterror(req, status);
1170                 return;
1171         }
1172
1173         status = NT_STATUS_INTERNAL_ERROR;
1174         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1175                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1176         tevent_req_nterror(req, status);
1177 }
1178
1179 static void smb_direct_connection_negotiate_connect_ibv_handler(struct tevent_context *ev,
1180                                              struct tevent_fd *fde,
1181                                              uint16_t flags,
1182                                              void *private_data)
1183 {
1184         struct tevent_req *req =
1185                 talloc_get_type_abort(private_data,
1186                 struct tevent_req);
1187         struct smb_direct_connection_negotiate_connect_state *state =
1188                 tevent_req_data(req,
1189                 struct smb_direct_connection_negotiate_connect_state);
1190         struct smb_direct_connection *c = state->c;
1191         struct ibv_cq *cq = NULL;
1192         void *cq_context = NULL;
1193         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1194         struct ibv_wc wc;
1195         int ret;
1196         uint16_t credits_requested;
1197         uint16_t credits_granted;
1198         uint32_t max_read_write_size;
1199         uint32_t preferred_send_size;
1200         uint32_t max_receive_size;
1201         uint32_t max_fragmented_size;
1202         uint32_t tmp;
1203         struct smb_direct_io *io = NULL;
1204         struct smb_direct_io *neg_send = NULL;
1205         struct smb_direct_io *neg_recv = NULL;
1206         struct ibv_recv_wr *bad_recv_wr = NULL;
1207         struct ibv_send_wr *bad_send_wr = NULL;
1208
1209         errno = 0;
1210         ret = ibv_get_cq_event(c->ibv.comp_channel,
1211                                &cq, &cq_context);
1212         if (ret != 0) {
1213                 status = map_nt_error_from_unix_common(errno);
1214                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1215                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1216                 tevent_req_nterror(req, status);
1217                 return;
1218         }
1219
1220         ibv_ack_cq_events(cq, 1);
1221
1222         if (cq_context != c) {
1223                 status = NT_STATUS_INTERNAL_ERROR;
1224                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1225                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1226                 tevent_req_nterror(req, status);
1227                 return;
1228         }
1229
1230         errno = 0;
1231         ret = ibv_req_notify_cq(cq, 0);
1232         if (ret != 0) {
1233                 status = map_nt_error_from_unix_common(errno);
1234                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1235                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1236                 tevent_req_nterror(req, status);
1237                 return;
1238         }
1239
1240         errno = 0;
1241         ZERO_STRUCT(wc);
1242         ret = ibv_poll_cq(cq, 1, &wc);
1243         if (ret != 1) {
1244                 status = map_nt_error_from_unix_common(errno);
1245                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1246                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1247                 tevent_req_nterror(req, status);
1248                 return;
1249         }
1250         ret = 0;
1251
1252         if (wc.status == IBV_WC_WR_FLUSH_ERR) {
1253                 //errno = wc.status;
1254                 status = map_nt_error_from_unix_common(wc.status);//errno);
1255                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1256                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1257                 TALLOC_FREE(c->ibv.fde_channel);
1258                 TALLOC_FREE(c->rdma.fde_channel);
1259                 smb_direct_connection_negotiate_connect_rdma_handler(ev, fde, flags, private_data);
1260                 return;
1261         }
1262         if (wc.status != IBV_WC_SUCCESS) {
1263                 errno = wc.status;
1264                 status = map_nt_error_from_unix_common(errno);
1265                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1266                         __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1267                         ibv_wc_status_str(wc.status)));
1268                 tevent_req_nterror(req, status);
1269                 return;
1270         }
1271
1272         io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
1273                                    struct smb_direct_io);
1274
1275         switch (wc.opcode) {
1276         case IBV_WC_RDMA_READ:
1277                 DEBUG(0,("%s:%s: GOT RDMA_READ[%p] next[%p] ret[%d] errno[%d]\n",
1278                         __location__, __FUNCTION__, io, io->send_wr.next, ret, errno));
1279                 TALLOC_FREE(io);
1280
1281                 neg_recv = smb_direct_io_create(c);
1282                 if (tevent_req_nomem(neg_recv, req)) {
1283                         return;
1284                 }
1285                 //neg_recv->sge[0].addr = (uint64_t) (uintptr_t) neg_recv->data;
1286                 //neg_recv->sge[0].length = sizeof(neg_recv->data);
1287                 //neg_recv->sge[0].lkey = neg_recv->data_mr->lkey;
1288                 neg_recv->recv_wr.sg_list = &neg_recv->sge[1];
1289                 neg_recv->recv_wr.num_sge = 1;
1290
1291                 neg_send = smb_direct_io_create(c);
1292                 if (tevent_req_nomem(neg_send, req)) {
1293                         return;
1294                 }
1295                 SSVAL(neg_send->data, 0x00, 0x0100);
1296                 SSVAL(neg_send->data, 0x02, 0x0100);
1297                 SSVAL(neg_send->data, 0x04, 0x0000);
1298                 SSVAL(neg_send->data, 0x06, c->state.send_credit_target);
1299                 SIVAL(neg_send->data, 0x08, c->state.max_send_size);
1300                 SIVAL(neg_send->data, 0x0C, c->state.max_receive_size);
1301                 SIVAL(neg_send->data, 0x10, c->state.max_fragmented_size);
1302                 //neg_send->sge[0].addr = (uint64_t) (uintptr_t) neg_send->data;
1303                 neg_send->sge[1].length = 0x14;
1304                 //neg_send->sge[0].lkey = neg_send->data_mr->lkey;
1305                 //neg_send->send_wr.opcode = IBV_WR_SEND;
1306                 //neg_send->send_wr.send_flags = IBV_SEND_SIGNALED;
1307                 neg_send->send_wr.sg_list = &neg_send->sge[1];
1308                 neg_send->send_wr.num_sge = 1;
1309
1310                 errno = 0;
1311                 ret = ibv_post_recv(c->ibv.qp, &neg_recv->recv_wr, &bad_recv_wr);
1312                 if (ret != 0) {
1313                         status = map_nt_error_from_unix_common(errno);
1314                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1315                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1316                         tevent_req_nterror(req, status);
1317                         return;
1318                 }
1319
1320                 errno = 0;
1321                 ret = ibv_post_send(c->ibv.qp, &neg_send->send_wr, &bad_send_wr);
1322                 if (ret != 0) {
1323                         status = map_nt_error_from_unix_common(errno);
1324                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1325                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1326                         tevent_req_nterror(req, status);
1327                         return;
1328                 }
1329
1330                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1331                         __location__, __FUNCTION__, ret, errno));
1332                 break;
1333
1334         case IBV_WC_SEND:
1335                 DEBUG(0,("%s:%s: GOT SEND[%p] next[%p] ret[%d] errno[%d]\n",
1336                         __location__, __FUNCTION__, io, io->send_wr.next, ret, errno));
1337                 TALLOC_FREE(io);
1338                 break;
1339         case IBV_WC_RECV:
1340                 DEBUG(0,("%s:%s: GOT RECV[%p] next[%p] ret[%d] errno[%d]\n",
1341                         __location__, __FUNCTION__, io, io->recv_wr.next, ret, errno));
1342                 //dump_data(0, io->data, wc.byte_len);
1343                 if (wc.byte_len < 0x20) {
1344                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1345                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1346                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1347                         tevent_req_nterror(req, status);
1348                         return;
1349                 }
1350                 if (SVAL(io->data, 0x00) != 0x0100) {
1351                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1352                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1353                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1354                         tevent_req_nterror(req, status);
1355                         return;
1356                 }
1357                 if (SVAL(io->data, 0x02) != 0x0100) {
1358                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1359                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1360                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1361                         tevent_req_nterror(req, status);
1362                         return;
1363                 }
1364                 if (SVAL(io->data, 0x04) != 0x0100) {
1365                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1366                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1367                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1368                         tevent_req_nterror(req, status);
1369                         return;
1370                 }
1371                 credits_requested = SVAL(io->data, 0x08);
1372                 if (credits_requested == 0) {
1373                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1374                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1375                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1376                         tevent_req_nterror(req, status);
1377                         return;
1378                 }
1379                 credits_granted = SVAL(io->data, 0x0A);
1380                 if (credits_granted == 0) {
1381                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1382                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1383                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1384                         tevent_req_nterror(req, status);
1385                         return;
1386                 }
1387                 status = NT_STATUS(IVAL(io->data, 0x0C));
1388                 if (!NT_STATUS_IS_OK(status)) {
1389                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1390                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1391                         tevent_req_nterror(req, status);
1392                         return;
1393                 }
1394                 max_read_write_size = IVAL(io->data, 0x10);
1395                 preferred_send_size = IVAL(io->data, 0x14);
1396                 if (preferred_send_size > c->state.max_receive_size) {
1397                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1398                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1399                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1400                         tevent_req_nterror(req, status);
1401                         return;
1402                 }
1403                 max_receive_size = IVAL(io->data, 0x18);
1404                 if (max_receive_size < 0x80) {
1405                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1406                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1407                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1408                         tevent_req_nterror(req, status);
1409                         return;
1410                 }
1411                 max_fragmented_size = IVAL(io->data, 0x1C);
1412                 if (max_fragmented_size < 0x20000) {
1413                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1414                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1415                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1416                         tevent_req_nterror(req, status);
1417                         return;
1418                 }
1419
1420                 c->state.receive_credit_target = credits_requested;
1421
1422                 tmp = c->state.max_receive_size;
1423                 tmp = MIN(tmp, preferred_send_size);
1424                 tmp = MAX(tmp, 128);
1425                 c->state.max_receive_size = tmp;
1426
1427                 tmp = c->state.max_send_size;
1428                 tmp = MIN(tmp, max_receive_size);
1429                 c->state.max_send_size = tmp;
1430
1431                 tmp = MIN(1048576, max_read_write_size);
1432                 c->state.max_read_write_size = tmp;
1433
1434                 tmp = c->state.max_fragmented_size;
1435                 tmp = MIN(tmp, max_fragmented_size);
1436                 c->state.max_fragmented_size = tmp;
1437
1438                 c->state.send_credits = credits_granted;
1439
1440                 TALLOC_FREE(c->ibv.fde_channel);
1441                 TALLOC_FREE(c->rdma.fde_channel);
1442
1443                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1444                         __location__, __FUNCTION__, ret, errno));
1445
1446                 TALLOC_FREE(io);
1447
1448                 errno = 0;
1449                 ret = smb_direct_connection_post_io(c);
1450                 if (ret != 0) {
1451                         status = map_nt_error_from_unix_common(errno);
1452                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1453                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1454                         tevent_req_nterror(req, status);
1455                         return;
1456                 }
1457
1458                 tevent_req_done(req);
1459                 return;
1460
1461         case IBV_WC_RDMA_WRITE:
1462         default:
1463                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1464                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1465                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1466                 tevent_req_nterror(req, status);
1467                 return;
1468         }
1469 }
1470
1471 static NTSTATUS smb_direct_connection_negotiate_connect_recv(struct tevent_req *req)
1472 {
1473 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1474         return tevent_req_simple_recv_ntstatus(req);
1475 }
1476
1477 struct smb_direct_connection_connect_state {
1478         struct tevent_context *ev;
1479         struct smb_direct_connection *c;
1480 };
1481
1482 static void smb_direct_connection_connect_done_rdma(struct tevent_req *subreq);
1483 static void smb_direct_connection_connect_done_negotiate(struct tevent_req *subreq);
1484
1485 struct tevent_req *smb_direct_connection_connect_send(TALLOC_CTX *mem_ctx,
1486                                                       struct tevent_context *ev,
1487                                                       struct smb_direct_connection *c,
1488                                                       const struct sockaddr_storage *src,
1489                                                       const struct sockaddr_storage *dst)
1490 {
1491         struct tevent_req *req = NULL;
1492         struct smb_direct_connection_connect_state *state = NULL;
1493         struct tevent_req *subreq = NULL;
1494
1495         req = tevent_req_create(mem_ctx, &state,
1496                                 struct smb_direct_connection_connect_state);
1497         if (req == NULL) {
1498                 return NULL;
1499         }
1500         state->ev = ev;
1501         state->c = c;
1502
1503 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1504         subreq = smb_direct_connection_rdma_connect_send(state, ev, c, src, dst, NULL, NULL);
1505         if (tevent_req_nomem(subreq, req)) {
1506                 return tevent_req_post(req, ev);
1507         }
1508         tevent_req_set_callback(subreq,
1509                                 smb_direct_connection_connect_done_rdma,
1510                                 req);
1511
1512 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1513         return req;
1514 }
1515
1516 static void smb_direct_connection_connect_done_rdma(struct tevent_req *subreq)
1517 {
1518         struct tevent_req *req =
1519                 tevent_req_callback_data(subreq,
1520                 struct tevent_req);
1521         struct smb_direct_connection_connect_state *state =
1522                 tevent_req_data(req,
1523                 struct smb_direct_connection_connect_state);
1524         NTSTATUS status;
1525
1526 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1527         status = smb_direct_connection_rdma_connect_recv(subreq);
1528         TALLOC_FREE(subreq);
1529         if (tevent_req_nterror(req, status)) {
1530                 return;
1531         }
1532
1533 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1534         subreq = smb_direct_connection_negotiate_connect_send(state, state->ev, state->c);
1535         if (tevent_req_nomem(subreq, req)) {
1536                 return;
1537         }
1538         tevent_req_set_callback(subreq,
1539                                 smb_direct_connection_connect_done_negotiate,
1540                                 req);
1541 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1542 }
1543
1544 static void smb_direct_connection_connect_done_negotiate(struct tevent_req *subreq)
1545 {
1546         struct tevent_req *req =
1547                 tevent_req_callback_data(subreq,
1548                 struct tevent_req);
1549         NTSTATUS status;
1550
1551 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1552         status = smb_direct_connection_negotiate_connect_recv(subreq);
1553         TALLOC_FREE(subreq);
1554         if (tevent_req_nterror(req, status)) {
1555                 return;
1556         }
1557
1558 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1559         tevent_req_done(req);
1560 }
1561
1562 NTSTATUS smb_direct_connection_connect_recv(struct tevent_req *req, int *fd)
1563 {
1564         struct smb_direct_connection_connect_state *state =
1565                 tevent_req_data(req,
1566                 struct smb_direct_connection_connect_state);
1567         struct smb_direct_connection *c = state->c;
1568         NTSTATUS status;
1569
1570         *fd = -1;
1571
1572         if (tevent_req_is_nterror(req, &status)) {
1573 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1574                 tevent_req_received(req);
1575                 return status;
1576         }
1577
1578 DEBUG(0,("%s:%s: sock.fd[%d] sock.tmp_fd[%d]\n",
1579         __location__, __func__, c->sock.fd, c->sock.tmp_fd));
1580         *fd = c->sock.tmp_fd;
1581         c->sock.tmp_fd = -1;
1582         tevent_req_received(req);
1583         return NT_STATUS_OK;
1584 }
1585
1586 struct smb_direct_connection_negotiate_accept_state {
1587         struct smb_direct_connection *c;
1588 };
1589
1590 static int smb_direct_connection_negotiate_accept_destructor(
1591                struct smb_direct_connection_negotiate_accept_state *state)
1592 {
1593         struct smb_direct_connection *c = state->c;
1594
1595         TALLOC_FREE(c->ibv.fde_channel);
1596         TALLOC_FREE(c->rdma.fde_channel);
1597
1598         return 0;
1599 }
1600
1601 static void smb_direct_connection_negotiate_accept_rdma_handler(struct tevent_context *ev,
1602                                               struct tevent_fd *fde,
1603                                               uint16_t flags,
1604                                               void *private_data);
1605 static void smb_direct_connection_negotiate_accept_ibv_handler(struct tevent_context *ev,
1606                                              struct tevent_fd *fde,
1607                                              uint16_t flags,
1608                                              void *private_data);
1609
1610 static struct tevent_req *smb_direct_connection_negotiate_accept_send(
1611         TALLOC_CTX *mem_ctx,
1612         struct tevent_context *ev,
1613         struct smb_direct_connection **_c)
1614 {
1615         struct tevent_req *req = NULL;
1616         struct smb_direct_connection_negotiate_accept_state *state = NULL;
1617         struct smb_direct_connection *c = NULL;
1618         struct smb_direct_io *neg_recv = NULL;
1619         struct ibv_recv_wr *bad_recv_wr = NULL;
1620         NTSTATUS status;
1621         int ret;
1622
1623         req = tevent_req_create(mem_ctx, &state,
1624                                 struct smb_direct_connection_negotiate_accept_state);
1625         if (req == NULL) {
1626                 return NULL;
1627         }
1628         c = talloc_move(state, _c);
1629         state->c = c;
1630         talloc_set_destructor(state, smb_direct_connection_negotiate_accept_destructor);
1631
1632         c->rdma.fde_channel = tevent_add_fd(ev, state,
1633                                             c->rdma.cm_channel->fd,
1634                                             TEVENT_FD_READ,
1635                                             smb_direct_connection_negotiate_accept_rdma_handler,
1636                                             req);
1637         if (tevent_req_nomem(c->rdma.fde_channel, req)) {
1638                 return tevent_req_post(req, ev);
1639         }
1640         c->ibv.fde_channel = tevent_add_fd(ev, state,
1641                                            c->ibv.comp_channel->fd,
1642                                            TEVENT_FD_READ,
1643                                            smb_direct_connection_negotiate_accept_ibv_handler,
1644                                            req);
1645         if (tevent_req_nomem(c->ibv.fde_channel, req)) {
1646                 return tevent_req_post(req, ev);
1647         }
1648
1649         neg_recv = smb_direct_io_create(c);
1650         if (tevent_req_nomem(neg_recv, req)) {
1651                 return tevent_req_post(req, ev);
1652         }
1653         //neg_recv->sge[0].addr = (uint64_t) (uintptr_t) neg_recv->data;
1654         neg_recv->sge[1].length = sizeof(neg_recv->data);
1655         //neg_recv->sge[0].lkey = neg_recv->data_mr->lkey;
1656         neg_recv->recv_wr.sg_list = &neg_recv->sge[1];
1657         neg_recv->recv_wr.num_sge = 1;
1658
1659         ret = ibv_post_recv(c->ibv.qp, &neg_recv->recv_wr, &bad_recv_wr);
1660         if (ret != 0) {
1661                 status = map_nt_error_from_unix_common(errno);
1662                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1663                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1664                 tevent_req_nterror(req, status);
1665                 return tevent_req_post(req, ev);
1666         }
1667
1668         ret = rdma_accept(c->rdma.cm_id, &c->rdma.conn_param);
1669         if (ret != 0) {
1670                 DBG_ERR("rdma_accept failed [%s] result [%d]\n", strerror(errno), ret);
1671                 status = map_nt_error_from_unix_common(errno);
1672                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1673                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1674                 tevent_req_nterror(req, status);
1675                 return tevent_req_post(req, ev);
1676         }
1677
1678         return req;
1679 }
1680
1681 static void smb_direct_connection_negotiate_accept_rdma_handler(
1682         struct tevent_context *ev,
1683         struct tevent_fd *fde,
1684         uint16_t flags,
1685         void *private_data)
1686 {
1687         struct tevent_req *req =
1688                 talloc_get_type_abort(private_data,
1689                 struct tevent_req);
1690         struct smb_direct_connection_negotiate_accept_state *state =
1691                 tevent_req_data(req,
1692                 struct smb_direct_connection_negotiate_accept_state);
1693         struct smb_direct_connection *c = state->c;
1694         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1695         int ret;
1696
1697 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1698
1699         errno = 0;
1700         ret = rdma_get_cm_event(c->rdma.cm_channel,
1701                                 &c->rdma.cm_event);
1702         if (ret != 0) {
1703                 status = map_nt_error_from_unix_common(errno);
1704                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1705                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1706                 tevent_req_nterror(req, status);
1707                 return;
1708         }
1709
1710         if (c->rdma.cm_event->status != 0) {
1711                 errno = c->rdma.cm_event->status;
1712                 status = map_nt_error_from_unix_common(errno);
1713                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1714                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1715                 tevent_req_nterror(req, status);
1716                 return;
1717         }
1718
1719         switch (c->rdma.cm_event->event) {
1720         case RDMA_CM_EVENT_ESTABLISHED:
1721                 DEBUG(0,("RDMA conn established [%p]\n", c));
1722                 rdma_ack_cm_event(c->rdma.cm_event);
1723                 c->rdma.cm_event = NULL;
1724                 return;
1725         case RDMA_CM_EVENT_DISCONNECTED:
1726                 status = NT_STATUS_CONNECTION_DISCONNECTED;
1727                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1728                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1729                 tevent_req_nterror(req, status);
1730                 return;
1731         case RDMA_CM_EVENT_ADDR_RESOLVED:
1732         case RDMA_CM_EVENT_ADDR_ERROR:
1733         case RDMA_CM_EVENT_ROUTE_RESOLVED:
1734         case RDMA_CM_EVENT_ROUTE_ERROR:
1735         case RDMA_CM_EVENT_CONNECT_REQUEST:
1736         case RDMA_CM_EVENT_CONNECT_RESPONSE:
1737         case RDMA_CM_EVENT_CONNECT_ERROR:
1738         case RDMA_CM_EVENT_UNREACHABLE:
1739         case RDMA_CM_EVENT_REJECTED:
1740         case RDMA_CM_EVENT_DEVICE_REMOVAL:
1741         case RDMA_CM_EVENT_MULTICAST_JOIN:
1742         case RDMA_CM_EVENT_MULTICAST_ERROR:
1743         case RDMA_CM_EVENT_ADDR_CHANGE:
1744         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1745                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1746                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] event [%d] "
1747                          "RDMA_CM_EVENT_REJECTED [%d]\n",
1748                          __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1749                          c->rdma.cm_event->event, RDMA_CM_EVENT_REJECTED));
1750                 tevent_req_nterror(req, status);
1751                 return;
1752         }
1753
1754         status = NT_STATUS_INTERNAL_ERROR;
1755         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1756                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1757         tevent_req_nterror(req, status);
1758 }
1759
1760 static void smb_direct_connection_negotiate_accept_ibv_handler(
1761         struct tevent_context *ev,
1762         struct tevent_fd *fde,
1763         uint16_t flags,
1764         void *private_data)
1765 {
1766         struct tevent_req *req =
1767                 talloc_get_type_abort(private_data,
1768                 struct tevent_req);
1769         struct smb_direct_connection_negotiate_accept_state *state =
1770                 tevent_req_data(req,
1771                 struct smb_direct_connection_negotiate_accept_state);
1772         struct smb_direct_connection *c = state->c;
1773         struct ibv_cq *cq = NULL;
1774         void *cq_context = NULL;
1775         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
1776         struct ibv_wc wc;
1777         int ret;
1778         uint16_t credits_requested;
1779         uint16_t credits_granted;
1780         uint32_t preferred_send_size;
1781         uint32_t max_receive_size;
1782         uint32_t max_fragmented_size;
1783         uint32_t tmp;
1784         struct smb_direct_io *io = NULL;
1785         struct smb_direct_io *neg_send = NULL;
1786         struct ibv_send_wr *bad_send_wr = NULL;
1787
1788 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
1789
1790         errno = 0;
1791         ret = ibv_get_cq_event(c->ibv.comp_channel,
1792                                &cq, &cq_context);
1793         if (ret != 0) {
1794                 status = map_nt_error_from_unix_common(errno);
1795                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1796                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1797                 tevent_req_nterror(req, status);
1798                 return;
1799         }
1800
1801         ibv_ack_cq_events(cq, 1);
1802
1803         if (cq_context != c) {
1804                 status = NT_STATUS_INTERNAL_ERROR;
1805                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1806                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1807                 tevent_req_nterror(req, status);
1808                 return;
1809         }
1810
1811         errno = 0;
1812         ret = ibv_req_notify_cq(cq, 0);
1813         if (ret != 0) {
1814                 status = map_nt_error_from_unix_common(errno);
1815                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1816                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1817                 tevent_req_nterror(req, status);
1818                 return;
1819         }
1820
1821         errno = 0;
1822         ZERO_STRUCT(wc);
1823         ret = ibv_poll_cq(cq, 1, &wc);
1824         if (ret != 1) {
1825                 status = map_nt_error_from_unix_common(errno);
1826                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1827                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1828                 tevent_req_nterror(req, status);
1829                 return;
1830         }
1831         ret = 0;
1832
1833         if (wc.status == IBV_WC_WR_FLUSH_ERR) {
1834                 //errno = wc.status;
1835                 status = map_nt_error_from_unix_common(wc.status);//errno);
1836                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1837                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1838                 TALLOC_FREE(c->ibv.fde_channel);
1839                 TALLOC_FREE(c->rdma.fde_channel);
1840                 smb_direct_connection_negotiate_connect_rdma_handler(ev, fde, flags, private_data);
1841                 return;
1842         }
1843         if (wc.status != IBV_WC_SUCCESS) {
1844                 errno = wc.status;
1845                 status = map_nt_error_from_unix_common(errno);
1846                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
1847                         __location__, __FUNCTION__, ret, errno, nt_errstr(status),
1848                         ibv_wc_status_str(wc.status)));
1849                 tevent_req_nterror(req, status);
1850                 return;
1851         }
1852
1853         io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
1854                                    struct smb_direct_io);
1855
1856         switch (wc.opcode) {
1857         case IBV_WC_SEND:
1858                 DEBUG(0,("%s:%s: GOT SEND[%p] next[%p] ret[%d] errno[%d]\n",
1859                         __location__, __FUNCTION__, io, io->send_wr.next, ret, errno));
1860                 TALLOC_FREE(io);
1861                 break;
1862         case IBV_WC_RDMA_READ:
1863                 DEBUG(0,("%s:%s: GOT RDMA_READ[%p] next[%p] ret[%d] errno[%d]\n",
1864                         __location__, __FUNCTION__, io, io->send_wr.next, ret, errno));
1865                 TALLOC_FREE(io);
1866                 break;
1867         case IBV_WC_RECV:
1868                 DEBUG(0,("%s:%s: GOT RECV[%p] next[%p] ret[%d] errno[%d]\n",
1869                         __location__, __FUNCTION__, io, io->recv_wr.next, ret, errno));
1870                 //dump_data(0, io->data, wc.byte_len);
1871                 if (wc.byte_len < 0x14) {
1872                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1873                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1874                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1875                         tevent_req_nterror(req, status);
1876                         return;
1877                 }
1878                 if (SVAL(io->data, 0x00) != 0x0100) {
1879                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1880                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1881                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1882                         tevent_req_nterror(req, status);
1883                         return;
1884                 }
1885                 if (SVAL(io->data, 0x02) != 0x0100) {
1886                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1887                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1888                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1889                         tevent_req_nterror(req, status);
1890                         return;
1891                 }
1892
1893                 credits_requested = SVAL(io->data, 0x06);
1894                 if (credits_requested == 0) {
1895                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1896                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1897                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1898                         tevent_req_nterror(req, status);
1899                         return;
1900                 }
1901
1902                 preferred_send_size = IVAL(io->data, 0x08);
1903                 if (preferred_send_size > c->state.max_receive_size) {
1904                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1905                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1906                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1907                         tevent_req_nterror(req, status);
1908                         return;
1909                 }
1910
1911                 max_receive_size = IVAL(io->data, 0x0c);
1912                 if (max_receive_size < 0x80) {
1913                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1914                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1915                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1916                         tevent_req_nterror(req, status);
1917                         return;
1918                 }
1919
1920                 max_fragmented_size = IVAL(io->data, 0x10);
1921                 if (max_fragmented_size < 0x20000) {
1922                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1923                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1924                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1925                         tevent_req_nterror(req, status);
1926                         return;
1927                 }
1928
1929                 c->state.receive_credit_target = credits_requested;
1930
1931                 tmp = c->state.max_receive_size;
1932                 tmp = MIN(tmp, preferred_send_size);
1933                 tmp = MAX(tmp, 128);
1934                 c->state.max_receive_size = tmp;
1935
1936                 tmp = c->state.max_send_size;
1937                 tmp = MIN(tmp, max_receive_size);
1938                 c->state.max_send_size = tmp;
1939
1940                 TALLOC_FREE(c->ibv.fde_channel);
1941                 TALLOC_FREE(c->rdma.fde_channel);
1942
1943                 DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
1944                         __location__, __FUNCTION__, ret, errno));
1945
1946                 TALLOC_FREE(io);
1947
1948                 errno = 0;
1949                 ret = smb_direct_connection_post_io(c);
1950                 if (ret != 0) {
1951                         status = map_nt_error_from_unix_common(errno);
1952                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1953                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1954                         tevent_req_nterror(req, status);
1955                         return;
1956                 }
1957
1958                 neg_send = c->s2r.idle;
1959                 DLIST_REMOVE(c->s2r.idle, neg_send);
1960
1961                 credits_granted = c->state.receive_posted;
1962                 credits_granted -= c->state.receive_credits;
1963                 credits_granted = MIN(credits_granted, c->state.receive_credit_target);
1964                 c->state.receive_credits += credits_granted;
1965
1966                 SSVAL(neg_send->data, 0x00, 0x0100);
1967                 SSVAL(neg_send->data, 0x02, 0x0100);
1968                 SSVAL(neg_send->data, 0x04, 0x0100);
1969                 SSVAL(neg_send->data, 0x06, 0x0000);
1970                 SSVAL(neg_send->data, 0x08, c->state.send_credit_target);
1971                 SSVAL(neg_send->data, 0x0a, credits_granted);
1972                 SIVAL(neg_send->data, 0x0c, NT_STATUS_V(NT_STATUS_OK));
1973                 SIVAL(neg_send->data, 0x10, c->state.max_read_write_size);
1974                 SIVAL(neg_send->data, 0x14, c->state.max_send_size);
1975                 SIVAL(neg_send->data, 0x18, c->state.max_receive_size);
1976                 SIVAL(neg_send->data, 0x1c, c->state.max_fragmented_size);
1977
1978                 neg_send->sge[1].length = 0x20;
1979                 neg_send->send_wr.sg_list = &neg_send->sge[1];
1980                 neg_send->send_wr.num_sge = 1;
1981
1982                 smb_direct_connection_debug_credits(c, "NEGOTIATE_ACCEPT", NULL, __location__, __FUNCTION__);
1983
1984                 ret = ibv_post_send(c->ibv.qp, &neg_send->send_wr, &bad_send_wr);
1985                 if (ret != 0) {
1986                         status = map_nt_error_from_unix_common(errno);
1987                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1988                                  __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
1989                         tevent_req_nterror(req, status);
1990                         return;
1991                 }
1992                 tevent_req_done(req);
1993                 return;
1994
1995         case IBV_WC_RDMA_WRITE:
1996         default:
1997                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
1998                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
1999                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2000                 tevent_req_nterror(req, status);
2001                 return;
2002         }
2003 }
2004
2005 static NTSTATUS smb_direct_connection_negotiate_accept_recv(
2006         struct tevent_req *req,
2007         TALLOC_CTX *mem_ctx,
2008         struct smb_direct_connection **_c)
2009 {
2010         struct smb_direct_connection_negotiate_accept_state *state =
2011                 tevent_req_data(req,
2012                 struct smb_direct_connection_negotiate_accept_state);
2013         NTSTATUS status;
2014
2015         DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2016
2017         *_c = NULL;
2018
2019         if (tevent_req_is_nterror(req, &status)) {
2020 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2021                 tevent_req_received(req);
2022                 return status;
2023         }
2024
2025         TALLOC_FREE(state->c->ibv.fde_channel);
2026         TALLOC_FREE(state->c->rdma.fde_channel);
2027         talloc_set_destructor(state, NULL);
2028
2029         *_c = talloc_move(mem_ctx, &state->c);
2030
2031         tevent_req_received(req);
2032         return NT_STATUS_OK;
2033 }
2034
2035 static void smb_direct_connection_disconnect(struct smb_direct_connection *c,
2036                                                  NTSTATUS status)
2037 {
2038         if (NT_STATUS_IS_OK(status)) {
2039                 status = NT_STATUS_UNEXPECTED_NETWORK_ERROR;
2040         }
2041
2042         smb_direct_connection_destructor(c);
2043 }
2044
2045 static void smb_direct_connection_rdma_handler(struct tevent_context *ev,
2046                                                struct tevent_fd *fde,
2047                                                uint16_t flags,
2048                                                void *private_data)
2049 {
2050         struct smb_direct_connection *c =
2051                 talloc_get_type_abort(private_data,
2052                 struct smb_direct_connection);
2053         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
2054         int ret;
2055
2056         errno = 0;
2057
2058         ret = rdma_get_cm_event(c->rdma.cm_channel,
2059                                 &c->rdma.cm_event);
2060         if (ret != 0) {
2061                 status = map_nt_error_from_unix_common(errno);
2062                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2063                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2064                 smb_direct_connection_disconnect(c, status);
2065                 return;
2066         }
2067
2068         if (c->rdma.cm_event->status != 0) {
2069                 errno = c->rdma.cm_event->status;
2070                 status = map_nt_error_from_unix_common(errno);
2071                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2072                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2073                 smb_direct_connection_disconnect(c, status);
2074                 return;
2075         }
2076
2077         switch (c->rdma.cm_event->event) {
2078         case RDMA_CM_EVENT_DISCONNECTED:
2079                 status = NT_STATUS_CONNECTION_DISCONNECTED;
2080                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2081                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2082                 smb_direct_connection_disconnect(c, status);
2083                 return;
2084         case RDMA_CM_EVENT_ADDR_RESOLVED:
2085         case RDMA_CM_EVENT_ADDR_ERROR:
2086         case RDMA_CM_EVENT_ROUTE_RESOLVED:
2087         case RDMA_CM_EVENT_ESTABLISHED:
2088         case RDMA_CM_EVENT_ROUTE_ERROR:
2089         case RDMA_CM_EVENT_CONNECT_REQUEST:
2090         case RDMA_CM_EVENT_CONNECT_RESPONSE:
2091         case RDMA_CM_EVENT_CONNECT_ERROR:
2092         case RDMA_CM_EVENT_UNREACHABLE:
2093         case RDMA_CM_EVENT_REJECTED:
2094         case RDMA_CM_EVENT_DEVICE_REMOVAL:
2095         case RDMA_CM_EVENT_MULTICAST_JOIN:
2096         case RDMA_CM_EVENT_MULTICAST_ERROR:
2097         case RDMA_CM_EVENT_ADDR_CHANGE:
2098         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2099                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2100                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2101                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2102                 smb_direct_connection_disconnect(c, status);
2103                 return;
2104         }
2105
2106         status = NT_STATUS_INTERNAL_ERROR;
2107         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2108                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2109         smb_direct_connection_disconnect(c, status);
2110 }
2111
2112 static void smb_direct_connection_ibv_handler(struct tevent_context *ev,
2113                                               struct tevent_fd *fde,
2114                                               uint16_t fde_flags,
2115                                               void *private_data)
2116 {
2117         struct smb_direct_connection *c =
2118                 talloc_get_type_abort(private_data,
2119                 struct smb_direct_connection);
2120         struct ibv_cq *cq = NULL;
2121         void *cq_context = NULL;
2122         NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
2123         struct ibv_wc wc;
2124         int ret;
2125         uint16_t credits_requested;
2126         uint16_t credits_granted;
2127         uint32_t send_credits;
2128         uint16_t flags;
2129         uint32_t data_offset;
2130         struct smb_direct_io *io = NULL;
2131         int try = 1;
2132         errno = 0;
2133         ret = ibv_get_cq_event(c->ibv.comp_channel,
2134                                &cq, &cq_context);
2135         if (ret != 0) {
2136                 status = map_nt_error_from_unix_common(errno);
2137                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2138                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2139                 smb_direct_connection_disconnect(c, status);
2140                 return;
2141         }
2142
2143         ibv_ack_cq_events(cq, 1);
2144
2145         if (cq_context != c) {
2146                 status = NT_STATUS_INTERNAL_ERROR;
2147                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2148                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2149                 smb_direct_connection_disconnect(c, status);
2150                 return;
2151         }
2152
2153         errno = 0;
2154         ret = ibv_req_notify_cq(cq, 0);
2155         if (ret != 0) {
2156                 status = map_nt_error_from_unix_common(errno);
2157                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2158                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2159                 smb_direct_connection_disconnect(c, status);
2160                 return;
2161         }
2162
2163         errno = 0;
2164 try_again:
2165         DEBUG(0,("%s:%s: try[%d] ret[%d] errno[%d] status[%s]\n",
2166                 __location__, __FUNCTION__, try, ret, errno, nt_errstr(status)));
2167         try++;
2168         ZERO_STRUCT(wc);
2169         ret = ibv_poll_cq(cq, 1, &wc);
2170         if (ret == 0 && try > 1) {
2171                 /*
2172                  * The queue is empty...
2173                  */
2174                 return;
2175         }
2176         if (ret != 1) {
2177                 status = map_nt_error_from_unix_common(errno);
2178                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2179                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2180                 smb_direct_connection_disconnect(c, status);
2181                 return;
2182         }
2183         ret = 0;
2184
2185         if (wc.status == IBV_WC_WR_FLUSH_ERR) {
2186                 errno = wc.status;
2187                 status = map_nt_error_from_unix_common(errno);
2188                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
2189                         __location__, __FUNCTION__, ret, errno, nt_errstr(status),
2190                         ibv_wc_status_str(wc.status)));
2191                 TALLOC_FREE(c->ibv.fde_channel);
2192                 TALLOC_FREE(c->rdma.fde_channel);
2193                 smb_direct_connection_rdma_handler(ev, fde, 0 /* flags */, private_data);
2194                 return;
2195         }
2196         if (wc.status != IBV_WC_SUCCESS) {
2197                 errno = wc.status;
2198                 status = map_nt_error_from_unix_common(errno);
2199                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
2200                         __location__, __FUNCTION__, ret, errno, nt_errstr(status),
2201                         ibv_wc_status_str(wc.status)));
2202                 smb_direct_connection_disconnect(c, status);
2203                 return;
2204         }
2205
2206         io = talloc_get_type_abort((void *)(uintptr_t)wc.wr_id,
2207                                    struct smb_direct_io);
2208
2209         switch (wc.opcode) {
2210         case IBV_WC_SEND:
2211                 DEBUG(0,("%s:%s: GOT SEND[%p] data_length[%u] remaining_length[%u] ret[%d] errno[%d]\n",
2212                         __location__, __FUNCTION__, io,
2213                         (unsigned)io->data_length, (unsigned)io->remaining_length, ret, errno));
2214                 DLIST_REMOVE(c->s2r.posted, io);
2215                 DLIST_ADD_END(c->s2r.idle, io);
2216
2217                 errno = 0;
2218                 ret = smb_direct_connection_setup_readv(c);
2219                 if (ret != 0) {
2220                         status = map_nt_error_from_unix_common(errno);
2221                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2222                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2223                         smb_direct_connection_disconnect(c, status);
2224                         return;
2225                 }
2226                 goto try_again;
2227                 return;
2228
2229         case IBV_WC_RECV:
2230                 DEBUG(0,("%s:%s: GOT RECV[%p] ret[%d] errno[%d]\n",
2231                         __location__, __FUNCTION__, io, ret, errno));
2232                 if (wc.byte_len > c->state.max_receive_size) {
2233                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2234                         DEBUG(0,("%s:%s: wc.byte_len[%u/0x%x] max_receive_size[%u/0x%x] ret[%d] errno[%d] status[%s]\n",
2235                                 __location__, __FUNCTION__,
2236                                 (unsigned)wc.byte_len,
2237                                 (unsigned)wc.byte_len,
2238                                 (unsigned)c->state.max_receive_size,
2239                                 (unsigned)c->state.max_receive_size,
2240                                 ret, errno, nt_errstr(status)));
2241                         smb_direct_connection_disconnect(c, status);
2242                         return;
2243                 }
2244                 if (wc.byte_len < SMB_DIRECT_DATA_MIN_HDR_SIZE) {
2245                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2246                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2247                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2248                         smb_direct_connection_disconnect(c, status);
2249                         return;
2250                 }
2251                 DLIST_REMOVE(c->r2s.posted, io);
2252                 //dump_data(0, io->smbd_hdr, MIN(wc.byte_len, sizeof(io->smbd_hdr)));
2253                 credits_requested = SVAL(io->smbd_hdr, 0x00);
2254                 if (credits_requested == 0) {
2255                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2256                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2257                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2258                         smb_direct_connection_disconnect(c, status);
2259                         return;
2260                 }
2261                 credits_granted = SVAL(io->smbd_hdr, 0x02);
2262                 flags = SVAL(io->smbd_hdr, 0x04);
2263                 io->remaining_length = IVAL(io->smbd_hdr, 0x08);
2264                 data_offset = IVAL(io->smbd_hdr, 0x0C);
2265                 io->data_length = IVAL(io->smbd_hdr, 0x10);
2266
2267                 c->state.receive_posted -= 1;
2268                 c->state.receive_credits -= 1;
2269                 c->state.receive_credit_target = credits_requested;
2270
2271                 send_credits = c->state.send_credits + credits_granted;
2272                 if (send_credits > c->state.send_credit_target) {
2273 smb_direct_connection_debug_credits(c, "wrong RECV",io, __location__, __func__);
2274                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2275                         DEBUG(0,("%s:%s: credits_granted[%u] send_credits[%u] ret[%d] errno[%d] status[%s]\n",
2276                                 __location__, __FUNCTION__,
2277                                 (unsigned)credits_granted, (unsigned)send_credits,
2278                                 ret, errno, nt_errstr(status)));
2279                         smb_direct_connection_disconnect(c, status);
2280                         return;
2281                 }
2282                 c->state.send_credits = send_credits;
2283
2284                 smb_direct_connection_debug_credits(c, "GOT_RECV", io, __location__, __FUNCTION__);
2285
2286                 if (data_offset == 0) {
2287                         if (wc.byte_len != SMB_DIRECT_DATA_MIN_HDR_SIZE) {
2288                                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2289                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2290                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2291                                 smb_direct_connection_disconnect(c, status);
2292                                 return;
2293                         }
2294                         DLIST_ADD_END(c->r2s.idle, io);
2295                         errno = 0;
2296                         ret = smb_direct_connection_post_io(c);
2297                         if (ret != 0) {
2298                                 status = map_nt_error_from_unix_common(errno);
2299                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2300                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2301                                 smb_direct_connection_disconnect(c, status);
2302                                 return;
2303                         }
2304                         goto try_again;
2305                         //return;
2306                 }
2307
2308                 if (data_offset != SMB_DIRECT_DATA_OFFSET) {
2309                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2310                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2311                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2312                         smb_direct_connection_disconnect(c, status);
2313                         return;
2314                 }
2315
2316                 if (io->data_length > (c->state.max_receive_size - data_offset)) {
2317                         status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2318                         DEBUG(0,("%s:%s: io->data_length[%u/0x%x] max_receive_size-data_offset[%u/0x%x] ret[%d] errno[%d] status[%s]\n",
2319                                 __location__, __FUNCTION__,
2320                                 (unsigned)io->data_length,
2321                                 (unsigned)io->data_length,
2322                                 (unsigned)c->state.max_receive_size - data_offset,
2323                                 (unsigned)c->state.max_receive_size - data_offset,
2324                                 ret, errno, nt_errstr(status)));
2325                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2326                                  __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2327                         smb_direct_connection_disconnect(c, status);
2328                         return;
2329                 }
2330
2331                 if (c->r2s.remaining_length > 0) {
2332                         if (io->data_length > c->r2s.remaining_length) {
2333                                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2334                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2335                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2336                                 smb_direct_connection_disconnect(c, status);
2337                                 return;
2338                         }
2339
2340                         c->r2s.remaining_length -= io->data_length;
2341
2342                         // Like this???
2343                         if (io->remaining_length > c->r2s.remaining_length) {
2344                                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2345                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2346                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2347                                 smb_direct_connection_disconnect(c, status);
2348                                 return;
2349                         }
2350
2351                         io->iov = io->_iov_array;
2352                         io->iov[0].iov_base = io->data;
2353                         io->iov[0].iov_len = io->data_length;
2354                         io->iov_count = 1;
2355                         DEBUG(0,("%s:%s: CONTINUE[%p] io->data_length[%u] io->remaining_length[%u]\n",
2356                                 __location__, __func__, io,
2357                                 (unsigned)io->data_length,
2358                                 (unsigned)io->remaining_length));
2359                 } else {
2360                         uint64_t total_length = io->data_length + io->remaining_length;
2361
2362                         if (total_length >= c->state.max_fragmented_size) { //correct direction
2363                                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2364                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2365                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2366                                 smb_direct_connection_disconnect(c, status);
2367                                 return;
2368                         }
2369
2370                         _smb_setlen_tcp(io->nbt_hdr, total_length);
2371                         io->iov = io->_iov_array;
2372                         io->iov[0].iov_base = io->nbt_hdr;
2373                         io->iov[0].iov_len = sizeof(io->nbt_hdr);
2374                         io->iov[1].iov_base = io->data;
2375                         io->iov[1].iov_len = io->data_length;
2376                         io->iov_count = 2;
2377                         DEBUG(0,("%s:%s: START[%p] total_length[%u] io->data_length[%u] io->remaining_length[%u]\n",
2378                                 __location__, __func__, io, (unsigned)total_length,
2379                                 (unsigned)io->data_length,
2380                                 (unsigned)io->remaining_length));
2381
2382                         c->r2s.remaining_length = io->remaining_length;;
2383                 }
2384
2385                 if (flags & ~SMB_DIRECT_RESPONSE_REQUESTED) {
2386                         status = map_nt_error_from_unix_common(errno);
2387                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2388                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2389                         smb_direct_connection_disconnect(c, status);
2390                         return;
2391                 }
2392
2393                 if (flags & SMB_DIRECT_RESPONSE_REQUESTED) {
2394                         c->state.send_immediate = true;
2395                         ret = smb_direct_connection_post_io(c);
2396                         if (ret != 0) {
2397                                 status = map_nt_error_from_unix_common(errno);
2398                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2399                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2400                                 smb_direct_connection_disconnect(c, status);
2401                                 return;
2402                         }
2403                 }
2404
2405                 if (c->state.receive_credits == 0) {
2406                         ret = smb_direct_connection_post_io(c);
2407                         if (ret != 0) {
2408                                 status = map_nt_error_from_unix_common(errno);
2409                                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2410                                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2411                                 smb_direct_connection_disconnect(c, status);
2412                                 return;
2413                         }
2414                 }
2415
2416                 DLIST_ADD_END(c->r2s.ready, io);
2417                 errno = 0;
2418                 ret = smb_direct_connection_setup_writev(c);
2419                 if (ret != 0) {
2420                         status = map_nt_error_from_unix_common(errno);
2421                         DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2422                                 __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2423                         smb_direct_connection_disconnect(c, status);
2424                         return;
2425                 }
2426                 goto try_again;
2427                 return;
2428
2429         case IBV_WC_RDMA_READ:
2430         case IBV_WC_RDMA_WRITE:
2431         default:
2432                 status = NT_STATUS_INVALID_NETWORK_RESPONSE;
2433                 DEBUG(0,("%s:%s: GOT OPCODE[%u] IO[%p] ret[%d] errno[%d] status[%s]\n",
2434                         __location__, __FUNCTION__, wc.opcode, io, ret, errno, nt_errstr(status)));
2435                 smb_direct_connection_disconnect(c, status);
2436                 return;
2437         }
2438 }
2439
2440 static bool smb_direct_connection_sock_handler_writable(
2441         struct smb_direct_connection *c)
2442 {
2443         struct smb_direct_io *io = NULL;
2444         ssize_t sret;
2445         bool ok;
2446
2447 more:
2448         if (c->r2s.out != NULL) {
2449                 io = c->r2s.out;
2450                                 DEBUG(0,("%s:%s: CONTINUE[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2451                                          __location__, __func__, io,
2452                                          (unsigned)io->data_length,
2453                                          (unsigned)io->remaining_length,
2454                                          (unsigned)io->iov_count,
2455                                          (unsigned)io->iov[0].iov_len));
2456         } else {
2457                 io = c->r2s.ready;
2458                 if (io != NULL) {
2459                         DLIST_REMOVE(c->r2s.ready, io);
2460                         c->r2s.out = io;
2461                 }
2462         }
2463
2464         if (io == NULL) {
2465                 DBG_DEBUG("TEVENT_FD_NOT_READABLE\n");
2466                 TEVENT_FD_NOT_WRITEABLE(c->sock.fde);
2467                 return true;
2468         }
2469
2470                                 DEBUG(0,("%s:%s: WRITEV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2471                                          __location__, __func__, io,
2472                                          (unsigned)io->data_length,
2473                                          (unsigned)io->remaining_length,
2474                                          (unsigned)io->iov_count,
2475                                          (unsigned)io->iov[0].iov_len));
2476         sret = writev(c->sock.fd, io->iov, io->iov_count);
2477         if (sret == -1) {
2478                 if (errno == EAGAIN) {
2479                                 DEBUG(0,("%s:%s: EAGAIN WRITEV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2480                                          __location__, __func__, io,
2481                                          (unsigned)io->data_length,
2482                                          (unsigned)io->remaining_length,
2483                                          (unsigned)io->iov_count,
2484                                          (unsigned)io->iov[0].iov_len));
2485                         DBG_DEBUG("readv returned EAGAIN\n");
2486                         TEVENT_FD_WRITEABLE(c->sock.fde);
2487                         return true;
2488                 }
2489
2490                 DBG_ERR("writev failed ret [%zd] [%s]\n",
2491                         sret, strerror(errno));
2492                 return false;
2493         }
2494
2495         ok = iov_advance(&io->iov, &io->iov_count, sret);
2496         if (!ok) {
2497                 DBG_ERR("iov_advance failed [%s]\n", strerror(errno));
2498                 return false;
2499         }
2500
2501         if (io->iov_count == 0) {
2502                                 DEBUG(0,("%s:%s: done WRITEV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2503                                          __location__, __func__, io,
2504                                          (unsigned)io->data_length,
2505                                          (unsigned)io->remaining_length,
2506                                          (unsigned)io->iov_count,
2507                                          (unsigned)io->iov[0].iov_len));
2508                 c->r2s.out = NULL;
2509                 DLIST_ADD_END(c->r2s.idle, io);
2510                 goto more;
2511         }
2512
2513                                 DEBUG(0,("%s:%s: remain WRITEV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2514                                          __location__, __func__, io,
2515                                          (unsigned)io->data_length,
2516                                          (unsigned)io->remaining_length,
2517                                          (unsigned)io->iov_count,
2518                                          (unsigned)io->iov[0].iov_len));
2519         return true;
2520 }
2521
2522 static bool smb_direct_connection_sock_handler_readable(
2523         struct smb_direct_connection *c)
2524 {
2525         struct smb_direct_io *io = NULL;
2526         ssize_t sret;
2527         bool ok;
2528
2529 next_read:
2530         if (c->s2r.in != NULL) {
2531                 io = c->s2r.in;
2532         } else {
2533                 io = c->s2r.idle;
2534                 if (io != NULL) {
2535                         DLIST_REMOVE(c->s2r.idle, io);
2536                         c->s2r.in = io;
2537
2538                         if (c->s2r.remaining_length > 0) {
2539                                 /*
2540                                  * We need to continue to get
2541                                  * the incomplete packet.
2542                                  */
2543                                 io->data_length = MIN(c->state.max_send_size - SMB_DIRECT_DATA_OFFSET,
2544                                                       c->s2r.remaining_length);
2545                                 io->remaining_length = c->s2r.remaining_length;
2546                                 io->remaining_length -= io->data_length;
2547                                 c->s2r.remaining_length = io->remaining_length;
2548
2549                                 io->iov = io->_iov_array;
2550                                 io->iov[0].iov_base = io->data;
2551                                 io->iov[0].iov_len = io->data_length;
2552                                 io->iov_count = 1;
2553                                 DEBUG(0,("%s:%s: CONTINUE[%p] io->data_length[%u] io->remaining_length[%u]\n",
2554                                          __location__, __func__, io,
2555                                          (unsigned)io->data_length,
2556                                          (unsigned)io->remaining_length));
2557                         } else {
2558                                 DEBUG(0,("%s:%s: WAIT[%p] for NBT\n", __location__, __func__, io));
2559                                 /*
2560                                  * For a new packet we need to get the length
2561                                  * first.
2562                                  */
2563                                 io->data_length = 0;
2564                                 io->remaining_length = 0;
2565
2566                                 io->iov = io->_iov_array;
2567                                 io->iov[0].iov_base = io->nbt_hdr;
2568                                 io->iov[0].iov_len = sizeof(io->nbt_hdr);
2569                                 io->iov_count = 1;
2570                         }
2571                 }
2572         }
2573
2574         if (io == NULL) {
2575                 DBG_DEBUG("TEVENT_FD_NOT_READABLE\n");
2576                 TEVENT_FD_NOT_READABLE(c->sock.fde);
2577                 return true;
2578         }
2579
2580
2581         sret = readv(c->sock.fd, io->iov, io->iov_count);
2582         if (sret == -1) {
2583                 if (errno == EAGAIN) {
2584                                 DEBUG(0,("%s:%s: EAGAIN READV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2585                                          __location__, __func__, io,
2586                                          (unsigned)io->data_length,
2587                                          (unsigned)io->remaining_length,
2588                                          (unsigned)io->iov_count,
2589                                          (unsigned)io->iov[0].iov_len));
2590                         DBG_DEBUG("readv returned EAGAIN\n");
2591                         TEVENT_FD_READABLE(c->sock.fde);
2592                         return true;
2593                 }
2594                 return false;
2595         }
2596
2597         if (sret == 0) {
2598                 errno = EPIPE;
2599                 return false;
2600         }
2601
2602         ok = iov_advance(&io->iov, &io->iov_count, sret);
2603         if (!ok) {
2604                 errno = EPIPE;
2605                 return false;
2606         }
2607
2608         if (io->iov_count != 0) {
2609                 /* Wait for more data */
2610                                 DEBUG(0,("%s:%s: more READV[%p] io->data_length[%u] io->remaining_length[%u] io->iov_count[%u] io->iov[0].iov_len[%u]\n",
2611                                          __location__, __func__, io,
2612                                          (unsigned)io->data_length,
2613                                          (unsigned)io->remaining_length,
2614                                          (unsigned)io->iov_count,
2615                                          (unsigned)io->iov[0].iov_len));
2616                         TEVENT_FD_READABLE(c->sock.fde);
2617                 return true;
2618         }
2619
2620         if (io->data_length != 0) {
2621                 DEBUG(0,("%s:%s: FINISH[%p] io->data_length[%u] io->remaining_length[%u]\n",
2622                          __location__, __func__, io, (unsigned)io->data_length, (unsigned)io->remaining_length));
2623                 /*
2624                  * We managed to read the whole fragment which
2625                  * is ready to be posted into the send queue.
2626                  */
2627                 c->s2r.in = NULL;
2628                 DLIST_ADD_END(c->s2r.ready, io);
2629                 goto next_read;
2630         }
2631
2632         c->s2r.remaining_length = smb_len_tcp(io->nbt_hdr);
2633         if (c->s2r.remaining_length > c->state.max_fragmented_size) {
2634                 errno = EINVAL;
2635                 return false;
2636         }
2637
2638         io->data_length = MIN(c->state.max_send_size - SMB_DIRECT_DATA_OFFSET,
2639                               c->s2r.remaining_length);
2640         io->remaining_length = c->s2r.remaining_length;
2641         io->remaining_length -= io->data_length;
2642
2643         io->iov = io->_iov_array;
2644         io->iov[0].iov_base = io->data;
2645         io->iov[0].iov_len = io->data_length;
2646         io->iov_count = 1;
2647
2648         DEBUG(0,("%s:%s: AFTER[%p] total[%u] io->data_length[%u] io->remaining_length[%u]\n",
2649                  __location__, __func__, io, (unsigned)c->s2r.remaining_length,
2650                  (unsigned)io->data_length,
2651                  (unsigned)io->remaining_length));
2652         c->s2r.remaining_length = io->remaining_length;
2653         /*
2654          * try to read the rest immediately.
2655          */
2656         goto next_read;
2657 }
2658
2659 static void smb_direct_connection_sock_handler(struct tevent_context *ev,
2660                                                struct tevent_fd *fde,
2661                                                uint16_t fde_flags,
2662                                                void *private_data)
2663 {
2664         struct smb_direct_connection *c =
2665                 talloc_get_type_abort(private_data,
2666                 struct smb_direct_connection);
2667         NTSTATUS status;
2668         int ret;
2669         bool ok;
2670
2671         if (fde_flags & TEVENT_FD_WRITE) {
2672                 ok = smb_direct_connection_sock_handler_writable(c);
2673                 if (!ok) {
2674                         status = map_nt_error_from_unix_common(errno);
2675                         DBG_ERR("smb-d sock_handler_writable failed [%s]\n",
2676                                 nt_errstr(status));
2677                         smb_direct_connection_disconnect(c, status);
2678                         return;
2679                 }
2680         }
2681
2682         ret = smb_direct_connection_post_io(c);
2683         if (ret != 0) {
2684                 status = map_nt_error_from_unix_common(errno);
2685                 DBG_ERR("smb_direct_connection_post_io failed [%s]\n",
2686                         nt_errstr(status));
2687                 smb_direct_connection_disconnect(c, status);
2688                 return;
2689         }
2690
2691         if (fde_flags & TEVENT_FD_READ) {
2692                 ok = smb_direct_connection_sock_handler_readable(c);
2693                 if (!ok) {
2694                         status = map_nt_error_from_unix_common(errno);
2695                         DBG_ERR("smb-d sock_handler_readable failed [%s]\n",
2696                                 nt_errstr(status));
2697                         smb_direct_connection_disconnect(c, status);
2698                         return;
2699                 }
2700         }
2701
2702         ret = smb_direct_connection_post_io(c);
2703         if (ret != 0) {
2704                 status = map_nt_error_from_unix_common(errno);
2705                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2706                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2707                 smb_direct_connection_disconnect(c, status);
2708                 return;
2709         }
2710 }
2711
2712 NTSTATUS smb_direct_connection_setup_events(struct smb_direct_connection *c,
2713                                             struct tevent_context *ev)
2714 {
2715         uint16_t sock_fde_flags = TEVENT_FD_READ;
2716
2717 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2718         if (c->r2s.out != NULL) {
2719 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2720                 sock_fde_flags |= TEVENT_FD_WRITE;
2721         }
2722
2723         if (c->rdma.cm_channel == NULL) {
2724                 return NT_STATUS_CONNECTION_DISCONNECTED;
2725         }
2726
2727         if (tevent_fd_get_flags(c->ibv.fde_channel) == 0) {
2728                 c->last_ev = NULL;
2729                 TALLOC_FREE(c->sock.fde);
2730                 TALLOC_FREE(c->ibv.fde_channel);
2731                 TALLOC_FREE(c->rdma.fde_channel);
2732 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2733         }
2734
2735         if (tevent_fd_get_flags(c->rdma.fde_channel) == 0) {
2736                 c->last_ev = NULL;
2737                 TALLOC_FREE(c->sock.fde);
2738                 TALLOC_FREE(c->ibv.fde_channel);
2739                 TALLOC_FREE(c->rdma.fde_channel);
2740 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2741         }
2742
2743         if (ev == NULL) {
2744 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2745                 c->last_ev = NULL;
2746                 TALLOC_FREE(c->sock.fde);
2747                 TALLOC_FREE(c->ibv.fde_channel);
2748                 TALLOC_FREE(c->rdma.fde_channel);
2749                 return NT_STATUS_OK;
2750         }
2751
2752         if (ev == c->last_ev) {
2753 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2754                 return NT_STATUS_OK;
2755         }
2756
2757         if (c->last_ev != NULL) {
2758 //DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2759                 return NT_STATUS_INVALID_PARAMETER_MIX;
2760         }
2761
2762         c->rdma.fde_channel = tevent_add_fd(ev, c,
2763                                             c->rdma.cm_channel->fd,
2764                                             TEVENT_FD_READ,
2765                                             smb_direct_connection_rdma_handler,
2766                                             c);
2767         if (c->rdma.fde_channel == NULL) {
2768                 return NT_STATUS_NO_MEMORY;
2769         }
2770         c->ibv.fde_channel = tevent_add_fd(ev, c,
2771                                            c->ibv.comp_channel->fd,
2772                                            TEVENT_FD_READ,
2773                                            smb_direct_connection_ibv_handler,
2774                                            c);
2775         if (c->ibv.fde_channel == NULL) {
2776                 TALLOC_FREE(c->rdma.fde_channel);
2777                 return NT_STATUS_NO_MEMORY;
2778         }
2779         c->sock.fde = tevent_add_fd(ev, c, c->sock.fd,
2780                                     sock_fde_flags,
2781                                     smb_direct_connection_sock_handler,
2782                                     c);
2783         if (c->sock.fde == NULL) {
2784                 TALLOC_FREE(c->rdma.fde_channel);
2785                 TALLOC_FREE(c->ibv.fde_channel);
2786                 return NT_STATUS_NO_MEMORY;
2787         }
2788
2789 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2790         c->last_ev = ev;
2791         return NT_STATUS_OK;
2792 }
2793
2794 uint32_t smb_direct_connection_max_fragmented_size(struct smb_direct_connection *c)
2795 {
2796         return c->state.max_fragmented_size;
2797 }
2798
2799 uint32_t smb_direct_connection_max_read_write_size(struct smb_direct_connection *c)
2800 {
2801         return c->state.max_read_write_size;
2802 }
2803
2804 static int smb_direct_listener_destructor(struct smb_direct_listener *l);
2805
2806 NTSTATUS smbd_direct_listener_setup(TALLOC_CTX *mem_ctx,
2807                                     const struct sockaddr_storage *addr,
2808                                     struct smb_direct_listener **_l)
2809 {
2810         struct smb_direct_listener *l;
2811         int ret;
2812         struct sockaddr_storage _addr = *addr;
2813         struct sockaddr *bind_addr = (struct sockaddr *)&_addr;
2814         char addr_buf[INET6_ADDRSTRLEN] = { 0, };
2815
2816         set_sockaddr_port(bind_addr, 5445);
2817
2818         l = talloc_zero(mem_ctx, struct smb_direct_listener);
2819         if (l == NULL) {
2820         DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2821                 return NT_STATUS_NO_MEMORY;
2822         }
2823         talloc_set_destructor(l, smb_direct_listener_destructor);
2824
2825         l->rdma.cm_channel = rdma_create_event_channel();
2826         if (l->rdma.cm_channel == NULL) {
2827                 TALLOC_FREE(l);
2828 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2829                 return NT_STATUS_NO_MEMORY;
2830         }
2831         smb_set_close_on_exec(l->rdma.cm_channel->fd);
2832         set_blocking(l->rdma.cm_channel->fd, false);
2833
2834         l->rdma.context.l = l;
2835
2836 #if RDMA_USER_CM_MAX_ABI_VERSION >= 2
2837         ret = rdma_create_id(l->rdma.cm_channel,
2838                              &l->rdma.cm_id,
2839                              &l->rdma.context,
2840                              RDMA_PS_TCP);
2841 #else
2842 #error
2843         ret = rdma_create_id(l->rdma.cm_channel,
2844                              &l->rdma.cm_id,
2845                              &l->rdma.context);
2846 #endif
2847         if (ret != 0) {
2848                 NTSTATUS status;
2849                 status = map_nt_error_from_unix_common(errno);
2850                 TALLOC_FREE(l);
2851 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2852                 return status;
2853         }
2854
2855         l->rdma.cm_channel = l->rdma.cm_id->channel;
2856
2857         errno = 0;
2858         ret = rdma_bind_addr(l->rdma.cm_id, bind_addr);
2859         if (ret != 0) {
2860                 NTSTATUS status;
2861                 status = map_nt_error_from_unix_common(errno);
2862                 TALLOC_FREE(l);
2863 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2864                 return status;
2865         }
2866
2867         ret = rdma_listen(l->rdma.cm_id, SMB_DIRECT_LISTENER_BACKLOG);
2868         if (ret != 0) {
2869                 NTSTATUS status;
2870                 status = map_nt_error_from_unix_common(errno);
2871                 TALLOC_FREE(l);
2872 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
2873                 return status;
2874         }
2875         l->rdma.expected_event = RDMA_CM_EVENT_CONNECT_REQUEST;
2876
2877         DBG_ERR("SMB-D daemon started listening SMB-D connections on listener[%p]: %s\n",
2878                 l, print_sockaddr(addr_buf, sizeof(addr_buf), &_addr));
2879
2880         *_l = l;
2881         return NT_STATUS_OK;
2882 }
2883
2884 static int smb_direct_listener_destructor(struct smb_direct_listener *l)
2885 {
2886         TALLOC_FREE(l->rdma.fde_channel);
2887
2888         if (l->rdma.cm_event != NULL) {
2889                 rdma_ack_cm_event(l->rdma.cm_event);
2890                 l->rdma.cm_event = NULL;
2891         }
2892
2893         if (l->rdma.cm_id != NULL) {
2894                 rdma_destroy_id(l->rdma.cm_id);
2895                 l->rdma.cm_id = NULL;
2896         }
2897
2898         if (l->rdma.cm_channel != NULL) {
2899                 rdma_destroy_event_channel(l->rdma.cm_channel);
2900                 l->rdma.cm_channel = NULL;
2901         }
2902
2903         return 0;
2904 }
2905
2906 struct smb_direct_listener_accept_state {
2907         struct tevent_context *ev;
2908         struct smb_direct_listener *l;
2909 };
2910
2911 static int smb_direct_listener_accept_state_destructor(
2912                 struct smb_direct_listener_accept_state *state)
2913 {
2914         TALLOC_FREE(state->l->rdma.fde_channel);
2915
2916         return 0;
2917 }
2918
2919 static void smb_direct_listener_accept_rdma_handler(struct tevent_context *ev,
2920                                                     struct tevent_fd *fde,
2921                                                     uint16_t flags,
2922                                                     void *private_data);
2923
2924 struct tevent_req *smb_direct_listener_accept_send(TALLOC_CTX *mem_ctx,
2925                                                    struct tevent_context *ev,
2926                                                    struct smb_direct_listener *l)
2927 {
2928         struct tevent_req *req = NULL;
2929         struct smb_direct_listener_accept_state *state = NULL;
2930
2931         req = tevent_req_create(mem_ctx, &state,
2932                                 struct smb_direct_listener_accept_state);
2933         if (req == NULL) {
2934                 return NULL;
2935         }
2936         state->ev = ev;
2937         state->l = l;
2938         talloc_set_destructor(state, smb_direct_listener_accept_state_destructor);
2939
2940 // HACK: if smb_direct_listener_accept_rdma_handler is not triggered by fde
2941 //      smb_direct_listener_accept_rdma_handler(ev, NULL, 0, req);
2942
2943         l->rdma.fde_channel = tevent_add_fd(ev, state,
2944                                         l->rdma.cm_channel->fd,
2945                                         TEVENT_FD_READ,
2946                                         smb_direct_listener_accept_rdma_handler,
2947                                         req);
2948         if (tevent_req_nomem(l->rdma.fde_channel, req)) {
2949                 return tevent_req_post(req, ev);
2950         }
2951
2952         DBG_ERR("SMB-D daemon started accepting SMB-D connections on listener[%p]\n", l);
2953
2954         return req;
2955 }
2956
2957 static void smb_direct_listener_accept_done(struct tevent_req *subreq);
2958
2959 static void smb_direct_listener_accept_rdma_handler(struct tevent_context *ev,
2960                                                     struct tevent_fd *fde,
2961                                                     uint16_t flags,
2962                                                     void *private_data)
2963 {
2964         struct tevent_req *req =
2965                 talloc_get_type_abort(private_data,
2966                 struct tevent_req);
2967         struct smb_direct_listener_accept_state *state =
2968                 tevent_req_data(req,
2969                 struct smb_direct_listener_accept_state);
2970         struct smb_direct_listener *l = state->l;
2971         struct smb_direct_connection *c = NULL;
2972         struct tevent_req *subreq = NULL;
2973         NTSTATUS status;
2974         int ret;
2975
2976         DBG_ERR("SMB-D got connection event listener[%p]\n", l);
2977
2978         ret = rdma_get_cm_event(l->rdma.cm_channel,
2979                                 &l->rdma.cm_event);
2980         if (ret != 0) {
2981                 if (errno == EAGAIN) {
2982                         DBG_ERR("SMB-D got EAGAIN error event on listener [%p]\n", l);
2983                         return;
2984                 }
2985                 status = map_nt_error_from_unix_common(errno);
2986                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2987                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2988                 smb_direct_listener_destructor(l); //TODO cleanup???
2989                 tevent_req_nterror(req, status);
2990                 return;
2991         }
2992
2993         errno = 0;
2994         if (l->rdma.cm_event->status != 0) {
2995                 errno = l->rdma.cm_event->status;
2996                 status = map_nt_error_from_unix_common(errno);
2997                 DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
2998                         __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
2999                 smb_direct_listener_destructor(l); //TODO cleanup???
3000                 tevent_req_nterror(req, status);
3001                 return;
3002         }
3003
3004         if (l->rdma.cm_event->event != l->rdma.expected_event) {
3005                 DEBUG(0,("%s:%s: l->rdma.cm_event->event[%u] != l->rdma.expected_event[%u]\n",
3006                         __location__, __FUNCTION__,
3007                         l->rdma.cm_event->event, l->rdma.expected_event));
3008         }
3009
3010         switch (l->rdma.cm_event->event) {
3011         case RDMA_CM_EVENT_CONNECT_REQUEST:
3012
3013                 DBG_ERR("RDMA_CM_EVENT_CONNECT_REQUEST\n");
3014
3015                 c = smb_direct_connection_create(l);
3016                 if (c == NULL) {
3017                         DBG_ERR("smb_direct_connection_create failed - ignoring\n");
3018
3019                         rdma_reject(l->rdma.cm_event->id, NULL, 0);
3020                         /* wait for more */
3021                         break;
3022                 }
3023
3024                 RSIVAL(c->rdma.ird_ord_hdr, 0, 0);
3025                 RSIVAL(c->rdma.ird_ord_hdr, 4, 16);
3026
3027                 c->rdma.conn_param = l->rdma.cm_event->param.conn;
3028                 c->rdma.conn_param.private_data = c->rdma.ird_ord_hdr;
3029                 c->rdma.conn_param.private_data_len = sizeof(c->rdma.ird_ord_hdr);
3030
3031                 c->rdma.cm_id = l->rdma.cm_event->id;
3032                 c->rdma.cm_id->context = &c->rdma.context;
3033
3034                 ret = rdma_migrate_id(c->rdma.cm_id, c->rdma.cm_channel);
3035                 if (ret != 0) {
3036                         DBG_ERR("rdma_migrate_id failed [%s] result [%d]\n", strerror(errno), ret);
3037                         c->rdma.cm_id->context = NULL;
3038                         c->rdma.cm_id = NULL;
3039                         TALLOC_FREE(c);
3040
3041                         rdma_reject(l->rdma.cm_event->id, NULL, 0);
3042                         /* wait for more */
3043                         break;
3044                 }
3045
3046                 status = smb_direct_connection_complete_alloc(c);
3047                 if (!NT_STATUS_IS_OK(status)) {
3048                         DBG_ERR("smb_direct_connection_complete_alloc failed - ignoring\n");
3049                         c->rdma.cm_id->context = NULL;
3050                         c->rdma.cm_id->channel = NULL;
3051                         c->rdma.cm_id = NULL;
3052                         TALLOC_FREE(c);
3053
3054                         rdma_reject(l->rdma.cm_event->id, NULL, 0);
3055                         /* wait for more */
3056                         break;
3057                 }
3058
3059                 c->l = l;
3060                 DLIST_ADD_END(l->pending, c);
3061
3062                 subreq = smb_direct_connection_negotiate_accept_send(state,
3063                                                                      state->ev,
3064                                                                      &c);
3065                 if (subreq == NULL) {
3066                         DBG_ERR("smb_direct_connection_accept_send ENOMEM\n");
3067                         TALLOC_FREE(c);
3068                         /* wait for more */
3069                         break;
3070                 }
3071                 tevent_req_set_callback(subreq, smb_direct_listener_accept_done, req);
3072                 break;
3073
3074         case RDMA_CM_EVENT_DISCONNECTED:
3075                 DBG_DEBUG("RDMA_CM_EVENT_DISCONNECTED\n");
3076                 break;
3077
3078         case RDMA_CM_EVENT_DEVICE_REMOVAL:
3079                 DBG_ERR("RDMA device removal\n");
3080                 break;
3081
3082         default:
3083                 DBG_ERR("event %d\n", l->rdma.cm_event->event);
3084                 break;
3085         }
3086
3087         rdma_ack_cm_event(l->rdma.cm_event);
3088         l->rdma.cm_event = NULL;
3089
3090         return;
3091 }
3092
3093 static void smb_direct_listener_accept_done(struct tevent_req *subreq)
3094 {
3095         struct tevent_req *req = tevent_req_callback_data(
3096                 subreq, struct tevent_req);
3097         struct smb_direct_listener_accept_state *state =
3098                 tevent_req_data(req,
3099                 struct smb_direct_listener_accept_state);
3100         struct smb_direct_listener *l = state->l;
3101         struct smb_direct_connection *c = NULL;
3102         NTSTATUS status;
3103
3104         DEBUG(0,("%s:%s: here...\n", __location__, __func__));
3105
3106         status = smb_direct_connection_negotiate_accept_recv(subreq, state, &c);
3107         TALLOC_FREE(subreq);
3108         if (!NT_STATUS_IS_OK(status)) {
3109                 TALLOC_FREE(c);
3110                 return;
3111         }
3112
3113         if (l == NULL) {
3114                 TALLOC_FREE(c);
3115                 return;
3116         }
3117
3118         DLIST_REMOVE(l->pending, c);
3119         DLIST_ADD_END(l->ready, c);
3120
3121         talloc_reparent(state, l, c);
3122
3123         tevent_req_defer_callback(req, state->ev);
3124         tevent_req_notify_callback(req);
3125         return;
3126 }
3127
3128 NTSTATUS smb_direct_listener_accept_recv(struct tevent_req *req,
3129                                          TALLOC_CTX *mem_ctx,
3130                                          struct smb_direct_connection **_c,
3131                                          int *fd,
3132                                          struct sockaddr_storage *laddr,
3133                                          struct sockaddr_storage *raddr)
3134 {
3135         struct smb_direct_listener_accept_state *state =
3136                 tevent_req_data(req,
3137                 struct smb_direct_listener_accept_state);
3138         struct smb_direct_listener *l = state->l;
3139         struct smb_direct_connection *c = NULL;
3140         NTSTATUS status;
3141
3142 DEBUG(0,("%s:%s: here...\n", __location__, __func__));
3143         *_c = NULL;
3144         *fd = -1;
3145         if (laddr != NULL) {
3146                 ZERO_STRUCTP(laddr);
3147         }
3148         if (raddr != NULL) {
3149                 ZERO_STRUCTP(raddr);
3150         }
3151
3152         if (tevent_req_is_in_progress(req)) {
3153                 /*
3154                  * We don't call tevent_req_received()
3155                  * because the caller can leave this alive
3156                  * in order to consume more connections
3157                  */
3158                 if (l->ready == NULL) {
3159         DEBUG(0,("%s:%s: here...\n", __location__, __func__));
3160                         return NT_STATUS_NO_MORE_ENTRIES;
3161                 }
3162
3163                 c = l->ready;
3164                 DLIST_REMOVE(l->ready, c);
3165                 c->l = NULL;
3166
3167                 if (l->ready != NULL) {
3168                         tevent_req_defer_callback(req, state->ev);
3169                         tevent_req_notify_callback(req);
3170                 }
3171
3172                 *fd = c->sock.tmp_fd;
3173                 c->sock.tmp_fd = -1;
3174                 if (laddr != NULL) {
3175                         ZERO_STRUCTP(laddr);
3176                 }
3177                 if (raddr != NULL) {
3178                         ZERO_STRUCTP(raddr);
3179                 }
3180         DEBUG(0,("%s:%s: here...\n", __location__, __func__));
3181                 *_c = talloc_move(mem_ctx, &c);
3182                 return NT_STATUS_OK;
3183         }
3184
3185         if (tevent_req_is_nterror(req, &status)) {
3186                 DEBUG(0,("%s:%s: here...[%s] %s\n", __location__, __func__, nt_errstr(status),
3187                         tevent_req_print(req, req)));
3188                 tevent_req_received(req);
3189                 return status;
3190         }
3191
3192         /* should never be reached */
3193         DEBUG(0,("%s:%s: here...[%s] %s\n", __location__, __func__, nt_errstr(status),
3194                 tevent_req_print(req, req)));
3195         tevent_req_received(req);
3196         return NT_STATUS_INTERNAL_ERROR;
3197 }
3198
3199 #endif /* SMB_TRANSPORT_ENABLE_RDMA */