2 * Unix SMB/CIFS implementation.
3 * Wrap Infiniband calls.
5 * Copyright (C) Sven Oehme <oehmes@de.ibm.com> 2006
7 * Major code contributions by Peter Somogyi <psomogyi@gamax.hu>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 #include <sys/types.h>
29 #include <netinet/in.h>
30 #include <sys/socket.h>
32 #include <arpa/inet.h>
38 #include "lib/events/events.h"
39 #include "ibwrapper.h"
41 #include <rdma/rdma_cma.h>
43 #include "ibwrapper_internal.h"
44 #include "lib/util/dlinklist.h"
46 #define IBW_LASTERR_BUFSIZE 512
47 static char ibw_lasterr[IBW_LASTERR_BUFSIZE];
49 static void ibw_event_handler_verbs(struct event_context *ev,
50 struct fd_event *fde, uint16_t flags, void *private_data);
51 static int ibw_fill_cq(struct ibw_conn *conn);
53 static void *ibw_alloc_mr(struct ibw_ctx_priv *pctx, struct ibw_conn_priv *pconn,
54 int n, struct ibv_mr **ppmr)
57 buf = memalign(pctx->pagesize, n);
59 sprintf(ibw_lasterr, "couldn't allocate memory\n");
63 *ppmr = ibv_reg_mr(pctx->pd, buf, n, IBV_ACCESS_LOCAL_WRITE);
65 sprintf(ibw_lasterr, "couldn't allocate mr\n");
73 static void ibw_free_mr(char **ppbuf, struct ibv_mr **ppmr)
85 static int ibw_init_memory(struct ibw_conn *conn)
87 struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
88 struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
93 pconn->buf_send = ibw_alloc_mr(pctx, pconn,
94 pctx->opts.max_send_wr * pctx->opts.avg_send_size, &pconn->mr_send);
95 if (!pconn->buf_send) {
96 sprintf(ibw_lasterr, "couldn't allocate work send buf\n");
100 pconn->buf_recv = ibw_alloc_mr(pctx, pconn,
101 pctx->opts.max_recv_wr * pctx->opts.recv_bufsize, &pconn->mr_recv);
102 if (!pconn->buf_recv) {
103 sprintf(ibw_lasterr, "couldn't allocate work recv buf\n");
107 pconn->wr_index = talloc_size(pconn, pctx->opts.max_send_wr * sizeof(struct ibw_wr *));
108 assert(pconn->wr_index!=NULL);
110 for(i=0; i<pctx->opts.max_send_wr; i++) {
111 p = pconn->wr_index[i] = talloc_zero(pconn, struct ibw_wr);
112 p->msg = pconn->buf_send + (i * pctx->opts.avg_send_size);
115 DLIST_ADD(pconn->wr_list_avail, p);
121 static int ibw_ctx_priv_destruct(struct ibw_ctx_priv *pctx)
124 ibv_dealloc_pd(pctx->pd);
129 if (pctx->cm_channel) {
130 rdma_destroy_event_channel(pctx->cm_channel);
131 pctx->cm_channel = NULL;
133 if (pctx->cm_channel_event) {
134 /* TODO: do we have to do this here? */
135 talloc_free(pctx->cm_channel_event);
136 pctx->cm_channel_event = NULL;
139 rdma_destroy_id(pctx->cm_id);
146 static int ibw_ctx_destruct(struct ibw_ctx *ctx)
151 static int ibw_conn_priv_destruct(struct ibw_conn_priv *pconn)
153 /* free memory regions */
154 ibw_free_mr(&pconn->buf_send, &pconn->mr_send);
155 ibw_free_mr(&pconn->buf_recv, &pconn->mr_recv);
157 /* pconn->wr_index is freed by talloc */
158 /* pconn->wr_index[i] are freed by talloc */
161 if (pconn->cm_id->qp) {
162 ibv_destroy_qp(pconn->cm_id->qp);
163 pconn->cm_id->qp = NULL;
166 ibv_destroy_cq(pconn->cq);
169 if (pconn->verbs_channel) {
170 ibv_destroy_comp_channel(pconn->verbs_channel);
171 pconn->verbs_channel = NULL;
173 if (pconn->verbs_channel_event) {
174 /* TODO: do we have to do this here? */
175 talloc_free(pconn->verbs_channel_event);
176 pconn->verbs_channel_event = NULL;
179 rdma_destroy_id(pconn->cm_id);
185 static int ibw_conn_destruct(struct ibw_conn *conn)
187 /* important here: ctx is a talloc _parent_ */
188 DLIST_REMOVE(conn->ctx->conn_list, conn);
192 static struct ibw_conn *ibw_conn_new(struct ibw_ctx *ctx)
194 struct ibw_conn *conn;
195 struct ibw_conn_priv *pconn;
197 conn = talloc_zero(ctx, struct ibw_conn);
199 talloc_set_destructor(conn, ibw_conn_destruct);
201 pconn = talloc_zero(ctx, struct ibw_conn_priv);
203 talloc_set_destructor(pconn, ibw_conn_priv_destruct);
207 DLIST_ADD(ctx->conn_list, conn);
212 static int ibw_setup_cq_qp(struct ibw_conn *conn)
214 struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
215 struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
216 struct ibv_qp_init_attr init_attr;
220 if (ibw_init_memory(conn))
224 pconn->verbs_channel = ibv_create_comp_channel(pconn->cm_id->verbs);
225 if (!pconn->verbs_channel) {
226 sprintf(ibw_lasterr, "ibv_create_comp_channel failed %d\n", errno);
229 DEBUG(10, ("created channel %p\n", pconn->verbs_channel));
231 pconn->verbs_channel_event = event_add_fd(pctx->ectx, conn,
232 pconn->verbs_channel->fd, EVENT_FD_READ, ibw_event_handler_verbs, conn);
235 pconn->cq = ibv_create_cq(pconn->cm_id->verbs,
236 pctx->opts.max_recv_wr + pctx->opts.max_send_wr,
237 conn, pconn->verbs_channel, 0);
238 if (pconn->cq==NULL) {
239 sprintf(ibw_lasterr, "ibv_create_cq failed\n");
243 rc = ibv_req_notify_cq(pconn->cq, 0);
245 sprintf(ibw_lasterr, "ibv_req_notify_cq failed with %d\n", rc);
250 memset(&init_attr, 0, sizeof(init_attr));
251 init_attr.cap.max_send_wr = pctx->opts.max_send_wr;
252 init_attr.cap.max_recv_wr = pctx->opts.max_recv_wr;
253 init_attr.cap.max_recv_sge = 1;
254 init_attr.cap.max_send_sge = 1;
255 init_attr.qp_type = IBV_QPT_RC;
256 init_attr.send_cq = pconn->cq;
257 init_attr.recv_cq = pconn->cq;
259 rc = rdma_create_qp(pconn->cm_id, pctx->pd, &init_attr);
261 sprintf(ibw_lasterr, "rdma_create_qp failed with %d\n", rc);
264 /* elase result is in pconn->cm_id->qp */
266 return ibw_fill_cq(conn);
269 static int ibw_refill_cq_recv(struct ibw_conn *conn)
271 struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
272 struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
274 struct ibv_sge list = {
275 .addr = (uintptr_t) NULL,
276 .length = pctx->opts.recv_bufsize,
277 .lkey = pconn->mr_recv->lkey
279 struct ibv_recv_wr wr = {
284 struct ibv_recv_wr *bad_wr;
286 list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index;
287 wr.wr_id = pctx->opts.max_send_wr + pconn->recv_index;
288 pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr;
290 rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr);
292 sprintf(ibw_lasterr, "ibv_post_recv failed with %d\n", rc);
293 DEBUG(0, (ibw_lasterr));
300 static int ibw_fill_cq(struct ibw_conn *conn)
302 struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
303 struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
305 struct ibv_sge list = {
306 .addr = (uintptr_t) NULL,
307 .length = pctx->opts.recv_bufsize,
308 .lkey = pconn->mr_recv->lkey
310 struct ibv_recv_wr wr = {
315 struct ibv_recv_wr *bad_wr;
317 for(i = pctx->opts.max_recv_wr; i!=0; i--) {
318 list.addr = (uintptr_t) pconn->buf_recv + pctx->opts.recv_bufsize * pconn->recv_index;
319 wr.wr_id = pctx->opts.max_send_wr + pconn->recv_index;
320 pconn->recv_index = (pconn->recv_index + 1) % pctx->opts.max_recv_wr;
322 rc = ibv_post_recv(pconn->cm_id->qp, &wr, &bad_wr);
324 sprintf(ibw_lasterr, "ibv_post_recv failed with %d\n", rc);
325 DEBUG(0, (ibw_lasterr));
333 static int ibw_manage_connect(struct ibw_conn *conn, struct rdma_cm_id *cma_id)
335 struct rdma_conn_param conn_param;
338 rc = ibw_setup_cq_qp(conn);
343 memset(&conn_param, 0, sizeof conn_param);
344 conn_param.responder_resources = 1;
345 conn_param.initiator_depth = 1;
346 conn_param.retry_count = 10;
348 rc = rdma_connect(cma_id, &conn_param);
350 sprintf(ibw_lasterr, "rdma_connect error %d\n", rc);
355 static void ibw_event_handler_cm(struct event_context *ev,
356 struct fd_event *fde, uint16_t flags, void *private_data)
359 struct ibw_ctx *ctx = talloc_get_type(private_data, struct ibw_ctx);
360 struct ibw_ctx_priv *pctx = talloc_get_type(ctx->internal, struct ibw_ctx_priv);
361 struct ibw_conn *conn = NULL;
362 struct ibw_conn_priv *pconn = NULL;
363 struct rdma_cm_id *cma_id = NULL;
364 struct rdma_cm_event *event = NULL;
368 rc = rdma_get_cm_event(pctx->cm_channel, &event);
370 ctx->state = IBWS_ERROR;
371 sprintf(ibw_lasterr, "rdma_get_cm_event error %d\n", rc);
376 DEBUG(10, ("cma_event type %d cma_id %p (%s)\n", event->event, cma_id,
377 (cma_id == pctx->cm_id) ? "parent" : "child"));
379 switch (event->event) {
380 case RDMA_CM_EVENT_ADDR_RESOLVED:
381 /* continuing from ibw_connect ... */
382 rc = rdma_resolve_route(cma_id, 2000);
384 sprintf(ibw_lasterr, "rdma_resolve_route error %d\n", rc);
387 /* continued at RDMA_CM_EVENT_ROUTE_RESOLVED */
390 case RDMA_CM_EVENT_ROUTE_RESOLVED:
391 /* after RDMA_CM_EVENT_ADDR_RESOLVED: */
392 assert(cma_id->context!=NULL);
393 conn = talloc_get_type(cma_id->context, struct ibw_conn);
395 rc = ibw_manage_connect(conn, cma_id);
401 case RDMA_CM_EVENT_CONNECT_REQUEST:
402 ctx->state = IBWS_CONNECT_REQUEST;
403 conn = ibw_conn_new(ctx);
404 pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
405 pconn->cm_id = cma_id; /* !!! event will be freed but id not */
406 cma_id->context = (void *)conn;
407 DEBUG(10, ("pconn->cm_id %p\n", pconn->cm_id));
409 conn->state = IBWC_INIT;
410 pctx->connstate_func(ctx, conn);
412 /* continued at ibw_accept when invoked by the func above */
413 if (!pconn->is_accepted) {
415 DEBUG(10, ("pconn->cm_id %p wasn't accepted\n", pconn->cm_id));
417 if (ibw_setup_cq_qp(conn))
421 /* TODO: clarify whether if it's needed by upper layer: */
422 ctx->state = IBWS_READY;
423 pctx->connstate_func(ctx, NULL);
425 /* NOTE: more requests can arrive until RDMA_CM_EVENT_ESTABLISHED ! */
428 case RDMA_CM_EVENT_ESTABLISHED:
429 /* expected after ibw_accept and ibw_connect[not directly] */
430 DEBUG(0, ("ESTABLISHED (conn: %u)\n", (unsigned int)cma_id->context));
431 conn = talloc_get_type(cma_id->context, struct ibw_conn);
432 assert(conn!=NULL); /* important assumption */
434 /* client conn is up */
435 conn->state = IBWC_CONNECTED;
437 /* both ctx and conn have changed */
438 pctx->connstate_func(ctx, conn);
441 case RDMA_CM_EVENT_ADDR_ERROR:
442 case RDMA_CM_EVENT_ROUTE_ERROR:
443 case RDMA_CM_EVENT_CONNECT_ERROR:
444 case RDMA_CM_EVENT_UNREACHABLE:
445 case RDMA_CM_EVENT_REJECTED:
446 sprintf(ibw_lasterr, "cma event %d, error %d\n", event->event, event->status);
449 case RDMA_CM_EVENT_DISCONNECTED:
450 if (cma_id!=pctx->cm_id) {
451 DEBUG(0, ("client DISCONNECT event\n"));
452 conn = talloc_get_type(cma_id->context, struct ibw_conn);
453 conn->state = IBWC_DISCONNECTED;
454 pctx->connstate_func(NULL, conn);
458 /* if we are the last... */
459 if (ctx->conn_list==NULL)
460 rdma_disconnect(pctx->cm_id);
462 DEBUG(0, ("server DISCONNECT event\n"));
463 ctx->state = IBWS_STOPPED; /* ??? TODO: try it... */
464 /* talloc_free(ctx) should be called within or after this func */
465 pctx->connstate_func(ctx, NULL);
469 case RDMA_CM_EVENT_DEVICE_REMOVAL:
470 sprintf(ibw_lasterr, "cma detected device removal!\n");
474 sprintf(ibw_lasterr, "unknown event %d\n", event->event);
478 if ((rc=rdma_ack_cm_event(event))) {
479 sprintf(ibw_lasterr, "rdma_ack_cm_event failed with %d\n", rc);
485 DEBUG(0, ("cm event handler: %s", ibw_lasterr));
486 if (cma_id!=pctx->cm_id) {
487 conn = talloc_get_type(cma_id->context, struct ibw_conn);
489 conn->state = IBWC_ERROR;
490 pctx->connstate_func(NULL, conn);
492 ctx->state = IBWS_ERROR;
493 pctx->connstate_func(ctx, NULL);
497 static void ibw_event_handler_verbs(struct event_context *ev,
498 struct fd_event *fde, uint16_t flags, void *private_data)
500 struct ibw_conn *conn = talloc_get_type(private_data, struct ibw_conn);
501 struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
502 struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
507 rc = ibv_poll_cq(pconn->cq, 1, &wc);
509 sprintf(ibw_lasterr, "ibv_poll_cq error %d\n", rc);
513 sprintf(ibw_lasterr, "cq completion failed status %d\n",
523 DEBUG(10, ("send completion\n"));
524 assert(pconn->cm_id->qp->qp_num==wc.qp_num);
525 assert(wc.wr_id < pctx->opts.max_send_wr);
527 p = pconn->wr_index[wc.wr_id];
529 ibw_free_mr(&p->msg_large, &p->mr_large);
532 DLIST_REMOVE(pconn->wr_list_used, p);
533 DLIST_ADD(pconn->wr_list_avail, p);
537 case IBV_WC_RDMA_WRITE:
538 DEBUG(10, ("rdma write completion\n"));
541 case IBV_WC_RDMA_READ:
542 DEBUG(10, ("rdma read completion\n"));
549 DEBUG(10, ("recv completion\n"));
550 assert(pconn->cm_id->qp->qp_num==wc.qp_num);
551 assert((int)wc.wr_id > pctx->opts.max_send_wr);
552 recv_index = (int)wc.wr_id - pctx->opts.max_send_wr;
553 assert(recv_index < pctx->opts.max_recv_wr);
554 assert(wc.byte_len <= pctx->opts.recv_bufsize);
556 /* TODO: take care of fragmented messages !!! */
557 pctx->receive_func(conn,
558 pconn->buf_recv + (recv_index * pctx->opts.recv_bufsize),
560 if (ibw_refill_cq_recv(conn))
566 sprintf(ibw_lasterr, "unknown completion %d\n", wc.opcode);
572 DEBUG(0, (ibw_lasterr));
573 conn->state = IBWC_ERROR;
574 pctx->connstate_func(NULL, conn);
577 static int ibw_process_init_attrs(struct ibw_initattr *attr, int nattr, struct ibw_opts *opts)
580 const char *name, *value;
582 opts->max_send_wr = 256;
583 opts->max_recv_wr = 1024;
584 opts->avg_send_size = 1024;
585 opts->recv_bufsize = 256;
587 for(i=0; i<nattr; i++) {
589 value = attr[i].value;
591 assert(name!=NULL && value!=NULL);
592 if (strcmp(name, "max_send_wr")==0)
593 opts->max_send_wr = atoi(value);
594 else if (strcmp(name, "max_recv_wr")==0)
595 opts->max_recv_wr = atoi(value);
596 else if (strcmp(name, "avg_send_size")==0)
597 opts->avg_send_size = atoi(value);
598 else if (strcmp(name, "recv_bufsize")==0)
599 opts->recv_bufsize = atoi(value);
601 sprintf(ibw_lasterr, "ibw_init: unknown name %s\n", name);
608 struct ibw_ctx *ibw_init(struct ibw_initattr *attr, int nattr,
610 ibw_connstate_fn_t ibw_connstate,
611 ibw_receive_fn_t ibw_receive,
612 struct event_context *ectx)
614 struct ibw_ctx *ctx = talloc_zero(NULL, struct ibw_ctx);
615 struct ibw_ctx_priv *pctx;
618 /* initialize basic data structures */
619 memset(ibw_lasterr, 0, IBW_LASTERR_BUFSIZE);
622 ibw_lasterr[0] = '\0';
623 talloc_set_destructor(ctx, ibw_ctx_destruct);
624 ctx->ctx_userdata = ctx_userdata;
626 pctx = talloc_zero(ctx, struct ibw_ctx_priv);
627 talloc_set_destructor(pctx, ibw_ctx_priv_destruct);
628 ctx->internal = (void *)pctx;
631 pctx->connstate_func = ibw_connstate;
632 pctx->receive_func = ibw_receive;
636 /* process attributes */
637 if (ibw_process_init_attrs(attr, nattr, &pctx->opts))
641 pctx->cm_channel = rdma_create_event_channel();
642 if (!pctx->cm_channel) {
643 sprintf(ibw_lasterr, "rdma_create_event_channel error %d\n", errno);
647 pctx->cm_channel_event = event_add_fd(pctx->ectx, pctx,
648 pctx->cm_channel->fd, EVENT_FD_READ, ibw_event_handler_cm, ctx);
650 rc = rdma_create_id(pctx->cm_channel, &pctx->cm_id, ctx, RDMA_PS_TCP);
653 sprintf(ibw_lasterr, "rdma_create_id error %d\n", rc);
656 DEBUG(10, ("created cm_id %p\n", pctx->cm_id));
659 pctx->pd = ibv_alloc_pd(pctx->cm_id->verbs);
661 sprintf(ibw_lasterr, "ibv_alloc_pd failed %d\n", errno);
664 DEBUG(10, ("created pd %p\n", pctx->pd));
666 pctx->pagesize = sysconf(_SC_PAGESIZE);
669 /* don't put code here */
671 DEBUG(0, (ibw_lasterr));
679 int ibw_stop(struct ibw_ctx *ctx)
683 for(p=ctx->conn_list; p!=NULL; p=p->next) {
684 if (ctx->state==IBWC_ERROR || ctx->state==IBWC_CONNECTED) {
685 if (ibw_disconnect(p))
693 int ibw_bind(struct ibw_ctx *ctx, struct sockaddr_in *my_addr)
695 struct ibw_ctx_priv *pctx = (struct ibw_ctx_priv *)ctx->internal;
698 rc = rdma_bind_addr(pctx->cm_id, (struct sockaddr *) my_addr);
700 sprintf(ibw_lasterr, "rdma_bind_addr error %d\n", rc);
701 DEBUG(0, (ibw_lasterr));
704 DEBUG(10, ("rdma_bind_addr successful\n"));
709 int ibw_listen(struct ibw_ctx *ctx, int backlog)
711 struct ibw_ctx_priv *pctx = talloc_get_type(ctx->internal, struct ibw_ctx_priv);
714 DEBUG(10, ("rdma_listen...\n"));
715 rc = rdma_listen(pctx->cm_id, backlog);
717 sprintf(ibw_lasterr, "rdma_listen failed: %d\n", rc);
718 DEBUG(0, (ibw_lasterr));
725 int ibw_accept(struct ibw_ctx *ctx, struct ibw_conn *conn, void *conn_userdata)
727 struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
728 struct rdma_conn_param conn_param;
731 conn->conn_userdata = conn_userdata;
733 memset(&conn_param, 0, sizeof(struct rdma_conn_param));
734 conn_param.responder_resources = 1;
735 conn_param.initiator_depth = 1;
736 rc = rdma_accept(pconn->cm_id, &conn_param);
738 sprintf(ibw_lasterr, "rdma_accept failed %d\n", rc);
739 DEBUG(0, (ibw_lasterr));
743 pconn->is_accepted = 1;
745 /* continued at RDMA_CM_EVENT_ESTABLISHED */
750 int ibw_connect(struct ibw_ctx *ctx, struct sockaddr_in *serv_addr, void *conn_userdata)
752 struct ibw_ctx_priv *pctx = talloc_get_type(ctx->internal, struct ibw_ctx_priv);
753 struct ibw_conn *conn = NULL;
754 struct ibw_conn_priv *pconn = NULL;
757 conn = ibw_conn_new(ctx);
758 conn->conn_userdata = conn_userdata;
759 pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
761 rc = rdma_create_id(pctx->cm_channel, &pconn->cm_id, conn, RDMA_PS_TCP);
764 sprintf(ibw_lasterr, "rdma_create_id error %d\n", rc);
768 rc = rdma_resolve_addr(pconn->cm_id, NULL, (struct sockaddr *) &serv_addr, 2000);
770 sprintf(ibw_lasterr, "rdma_resolve_addr error %d\n", rc);
771 DEBUG(0, (ibw_lasterr));
775 /* continued at RDMA_CM_EVENT_ADDR_RESOLVED */
780 int ibw_disconnect(struct ibw_conn *conn)
783 struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
785 rc = rdma_disconnect(pctx->cm_id);
787 sprintf(ibw_lasterr, "ibw_disconnect failed with %d", rc);
788 DEBUG(0, (ibw_lasterr));
792 /* continued at RDMA_CM_EVENT_DISCONNECTED */
797 int ibw_alloc_send_buf(struct ibw_conn *conn, void **buf, void **key, int n)
799 struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
800 struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
801 struct ibw_wr *p = pconn->wr_list_avail;
804 sprintf(ibw_lasterr, "insufficient wr chunks\n");
808 DLIST_REMOVE(pconn->wr_list_avail, p);
809 DLIST_ADD(pconn->wr_list_used, p);
811 if (n + sizeof(long) <= pctx->opts.avg_send_size) {
812 *buf = (void *)(p->msg + sizeof(long));
815 p->msg_large = ibw_alloc_mr(pctx, pconn, n + sizeof(long), &p->mr_large);
817 sprintf(ibw_lasterr, "ibw_alloc_send_buf alloc error\n");
818 DEBUG(0, (ibw_lasterr));
821 *buf = (void *)(p->msg_large + sizeof(long));
827 int ibw_send(struct ibw_conn *conn, void *buf, void *key, int n)
829 struct ibw_ctx_priv *pctx = talloc_get_type(conn->ctx->internal, struct ibw_ctx_priv);
830 struct ibw_conn_priv *pconn = talloc_get_type(conn->internal, struct ibw_conn_priv);
831 struct ibw_wr *p = talloc_get_type(key, struct ibw_wr);
832 struct ibv_sge list = {
833 .addr = (uintptr_t) NULL,
837 struct ibv_send_wr wr = {
841 .opcode = IBV_WR_SEND,
842 .send_flags = IBV_SEND_SIGNALED,
844 struct ibv_send_wr *bad_wr;
846 if (n + sizeof(long)<=pctx->opts.avg_send_size) {
847 assert((p->msg + sizeof(long))==(char *)buf);
848 list.lkey = pconn->mr_send->lkey;
849 list.addr = (uintptr_t) p->msg;
851 *((uint32_t *)p->msg) = htonl(n);
853 assert((p->msg_large + sizeof(long))==(char *)buf);
854 assert(p->mr_large!=NULL);
855 list.lkey = p->mr_large->lkey;
856 list.addr = (uintptr_t) p->msg_large;
858 *((uint32_t *)p->msg_large) = htonl(n);
861 return ibv_post_send(pconn->cm_id->qp, &wr, &bad_wr);
864 const char *ibw_getLastError(void)