--- /dev/null
+/*
+ Unix SMB/CIFS implementation.
+ Infrastructure for SMB-Direct RDMA as transport
+ Copyright (C) Stefan Metzmacher 2012
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "system/network.h"
+#include <tevent.h>
+#include "../util/tevent_ntstatus.h"
+#include "../lib/tsocket/tsocket.h"
+#include "lib/util/util_net.h"
+#include "libcli/smb/smb_transport.h"
+#include "libcli/smb/smb_common.h"
+
+#ifdef SMB_TRANSPORT_ENABLE_RDMA
+#include <rdma/rdma_cma_abi.h>
+#include <rdma/rdma_cma.h>
+#include <infiniband/verbs.h>
+
+struct smb_transport_direct {
+ struct {
+ struct rdma_cm_id *cm_id;
+ struct rdma_event_channel *cm_channel;
+ struct tevent_fd *fde_channel;
+ enum rdma_cm_event_type expected_event;
+ struct rdma_cm_event *cm_event;
+ } rdma;
+ struct {
+ struct ibv_pd *pd;
+ struct ibv_comp_channel *comp_channel;
+ struct tevent_fd *fde_channel;
+ struct ibv_cq *send_cq;
+ struct ibv_cq *recv_cq;
+ struct ibv_qp *qp;
+ } ibv;
+ struct {
+ uint32_t max_send_size;
+ uint32_t max_receive_size;
+ uint32_t max_fragmented_size;
+ uint32_t max_read_write_size;
+ uint16_t send_credit_target;
+ uint16_t send_credits;
+ uint16_t receive_credit_max;
+ uint16_t receive_credit_target;
+ uint16_t receive_credits;
+ uint32_t keep_alive_internal;
+ } state;
+ struct {
+ uint8_t hdr[0x18];
+ uint8_t *reassembly_buffer;
+ uint32_t reassembly_remaining;
+ uint8_t *full_buffer;
+ struct ibv_mr *hdr_mr;
+ struct ibv_mr *cur_mr;
+ struct ibv_sge sge[2];
+ struct ibv_recv_wr wr;
+ } inbuf;
+
+ struct {
+ struct tevent_context *last_ev;
+ struct tevent_req *read_pdu_req;
+ struct tevent_req *write_pdu_req;
+ } reqs;
+};
+
+static int smb_transport_direct_destructor(struct smb_transport_direct *t);
+
+static int smb_transport_direct_destructor(struct smb_transport_direct *t)
+{
+ TALLOC_FREE(t->ibv.fde_channel);
+ TALLOC_FREE(t->rdma.fde_channel);
+
+ if (t->reqs.read_pdu_req != NULL) {
+ tevent_req_received(t->reqs.read_pdu_req);
+ t->reqs.read_pdu_req = NULL;
+ }
+
+ if (t->reqs.write_pdu_req != NULL) {
+ tevent_req_received(t->reqs.write_pdu_req);
+ t->reqs.write_pdu_req = NULL;
+ }
+
+ if (t->inbuf.hdr_mr != NULL) {
+ ibv_dereg_mr(t->inbuf.hdr_mr);
+ t->inbuf.hdr_mr = NULL;
+ }
+
+ if (t->inbuf.cur_mr != NULL) {
+ ibv_dereg_mr(t->inbuf.cur_mr);
+ t->inbuf.cur_mr = NULL;
+ }
+
+ if (t->rdma.cm_event != NULL) {
+ rdma_ack_cm_event(t->rdma.cm_event);
+ t->rdma.cm_event = NULL;
+ }
+
+ if (t->ibv.qp != NULL) {
+ ibv_destroy_qp(t->ibv.qp);
+ t->ibv.qp = NULL;
+ }
+
+ if (t->ibv.send_cq != NULL) {
+ ibv_destroy_cq(t->ibv.send_cq);
+ t->ibv.send_cq = NULL;
+ }
+
+ if (t->ibv.recv_cq != NULL) {
+ ibv_destroy_cq(t->ibv.recv_cq);
+ t->ibv.recv_cq = NULL;
+ }
+
+ if (t->ibv.comp_channel != NULL) {
+ ibv_destroy_comp_channel(t->ibv.comp_channel);
+ t->ibv.comp_channel = NULL;
+ }
+
+ if (t->ibv.pd != NULL) {
+ ibv_dealloc_pd(t->ibv.pd);
+ t->ibv.pd = NULL;
+ }
+
+ if (t->rdma.cm_id != NULL) {
+ rdma_destroy_id(t->rdma.cm_id);
+ t->rdma.cm_id = NULL;
+ }
+
+ if (t->rdma.cm_channel != NULL) {
+ rdma_destroy_event_channel(t->rdma.cm_channel);
+ t->rdma.cm_channel = NULL;
+ }
+
+ return 0;
+}
+
+struct smb_transport_direct_connect_rdma_state {
+ struct smb_transport_direct *t;
+};
+
+static void smb_transport_direct_connect_rdma_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data);
+
+struct tevent_req *smb_transport_direct_connect_rdma_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct smb_transport *transport,
+ const struct sockaddr_storage *_addr,
+ struct tsocket_address *local_addr,
+ struct tsocket_address *remote_addr)
+{
+ struct tevent_req *req;
+ struct smb_transport_direct_connect_rdma_state *state;
+ struct smb_transport_direct *t =
+ smb_transport_data(transport,
+ struct smb_transport_direct);
+ int ret;
+ struct sockaddr_storage addr = *_addr;
+ struct sockaddr *src_addr = NULL;
+ struct sockaddr *dst_addr = (struct sockaddr *)&addr;
+
+ set_sockaddr_port(dst_addr, 5445);
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct smb_transport_direct_connect_rdma_state);
+ if (req == NULL) {
+ return NULL;
+ }
+ state->t = t;
+
+ state->t->rdma.fde_channel = tevent_add_fd(ev, state->t,
+ state->t->rdma.cm_channel->fd,
+ TEVENT_FD_READ,
+ smb_transport_direct_connect_rdma_handler,
+ req);
+ if (tevent_req_nomem(state->t->rdma.fde_channel, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ errno = 0;
+ ret = rdma_resolve_addr(state->t->rdma.cm_id,
+ src_addr, dst_addr,
+ 5000);
+ if (ret != 0) {
+ NTSTATUS status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return tevent_req_post(req, ev);
+ }
+ state->t->rdma.expected_event = RDMA_CM_EVENT_ADDR_RESOLVED;
+
+ return req;
+}
+
+static void smb_transport_direct_connect_rdma_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data)
+{
+ struct tevent_req *req =
+ talloc_get_type_abort(private_data,
+ struct tevent_req);
+ struct smb_transport_direct_connect_rdma_state *state =
+ tevent_req_data(req,
+ struct smb_transport_direct_connect_rdma_state);
+ struct ibv_qp_init_attr init_attr;
+ struct rdma_conn_param conn_param;
+ uint8_t ird_ord_hdr[8];
+ NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
+ int ret;
+
+ errno = 0;
+
+ ret = rdma_get_cm_event(state->t->rdma.cm_channel,
+ &state->t->rdma.cm_event);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ errno = 0;
+ if (state->t->rdma.cm_event->status != 0) {
+ errno = state->t->rdma.cm_event->status;
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ if (state->t->rdma.cm_event->event != state->t->rdma.expected_event) {
+ DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
+ __location__, __FUNCTION__, ret, errno));
+
+ }
+
+ switch (state->t->rdma.cm_event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ errno = 0;
+ ret = rdma_resolve_route(state->t->rdma.cm_id, 5000);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ state->t->rdma.expected_event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ break;
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ errno = 0;
+ ret = 0;
+ state->t->ibv.pd = ibv_alloc_pd(state->t->rdma.cm_id->verbs);
+ if (state->t->ibv.pd == NULL) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ state->t->ibv.comp_channel = ibv_create_comp_channel(state->t->rdma.cm_id->verbs);
+ if (state->t->ibv.comp_channel == NULL) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ set_blocking(state->t->ibv.comp_channel->fd, false);
+ smb_set_close_on_exec(state->t->ibv.comp_channel->fd);
+
+ ZERO_STRUCT(init_attr);
+ init_attr.cap.max_send_wr = 2;
+ init_attr.cap.max_recv_wr = 2;
+ init_attr.cap.max_recv_sge = 2;
+ init_attr.cap.max_send_sge = 2;
+ init_attr.qp_type = IBV_QPT_RC;
+ init_attr.sq_sig_all = 1;
+
+ state->t->ibv.send_cq = ibv_create_cq(state->t->rdma.cm_id->verbs,
+ init_attr.cap.max_send_wr,
+ state->t,
+ state->t->ibv.comp_channel,
+ 0);
+ if (state->t->ibv.send_cq == NULL) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ init_attr.send_cq = state->t->ibv.send_cq;
+ state->t->ibv.recv_cq = ibv_create_cq(state->t->rdma.cm_id->verbs,
+ init_attr.cap.max_recv_wr,
+ state->t,
+ state->t->ibv.comp_channel,
+ 0);
+ if (state->t->ibv.recv_cq == NULL) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ init_attr.recv_cq = state->t->ibv.recv_cq;
+
+ errno = 0;
+ ret = ibv_req_notify_cq(state->t->ibv.send_cq, 0);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ errno = 0;
+ ret = ibv_req_notify_cq(state->t->ibv.recv_cq, 0);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ errno = 0;
+ ret = rdma_create_qp(state->t->rdma.cm_id, state->t->ibv.pd,
+ &init_attr);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ state->t->ibv.qp = state->t->rdma.cm_id->qp;
+
+ RSIVAL(ird_ord_hdr, 0, 16);
+ RSIVAL(ird_ord_hdr, 4, 0);
+
+ ZERO_STRUCT(conn_param);
+ conn_param.private_data = ird_ord_hdr;
+ conn_param.private_data_len = sizeof(ird_ord_hdr);
+ conn_param.responder_resources = 1;
+ conn_param.initiator_depth = 1;
+ conn_param.retry_count = 10;
+
+ errno = 0;
+ ret = rdma_connect(state->t->rdma.cm_id, &conn_param);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ state->t->rdma.expected_event = RDMA_CM_EVENT_ESTABLISHED;
+ break;
+
+ case RDMA_CM_EVENT_ESTABLISHED:
+ errno = 0;
+ ret = 0;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
+ __location__, __FUNCTION__, ret, errno));
+
+ state->t->rdma.expected_event = RDMA_CM_EVENT_DISCONNECTED;
+ TALLOC_FREE(state->t->rdma.fde_channel);
+ rdma_ack_cm_event(state->t->rdma.cm_event);
+ state->t->rdma.cm_event = NULL;
+ tevent_req_done(req);
+ return;
+
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ case RDMA_CM_EVENT_CONNECT_RESPONSE:
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_REJECTED:
+ case RDMA_CM_EVENT_DISCONNECTED:
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ case RDMA_CM_EVENT_MULTICAST_JOIN:
+ case RDMA_CM_EVENT_MULTICAST_ERROR:
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: event[%d] ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__,
+ state->t->rdma.cm_event->event, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ rdma_ack_cm_event(state->t->rdma.cm_event);
+ state->t->rdma.cm_event = NULL;
+}
+
+NTSTATUS smb_transport_direct_connect_rdma_recv(struct tevent_req *req)
+{
+ struct smb_transport_direct_connect_rdma_state *state =
+ tevent_req_data(req,
+ struct smb_transport_direct_connect_rdma_state);
+ NTSTATUS status;
+
+ TALLOC_FREE(state->t->rdma.fde_channel);
+
+ if (tevent_req_is_nterror(req, &status)) {
+ tevent_req_received(req);
+ return status;
+ }
+
+ tevent_req_received(req);
+ return NT_STATUS_OK;
+}
+
+struct smb_transport_direct_connect_negotiate_state {
+ struct smb_transport_direct *t;
+ struct {
+ struct ibv_sge sge[1];
+ struct ibv_send_wr wr;
+ } rdma_read;
+ struct {
+ uint8_t buffer[0x14];
+ struct ibv_mr *mr;
+ struct ibv_sge sge[1];
+ struct ibv_send_wr wr;
+ } req;
+ struct {
+ uint8_t buffer[512];//0x20];
+ struct ibv_mr *mr;
+ struct ibv_sge sge[1];
+ struct ibv_recv_wr wr;
+ } rep;
+};
+
+static int smb_transport_direct_connect_negotiate_destructor(
+ struct smb_transport_direct_connect_negotiate_state *state)
+{
+ TALLOC_FREE(state->t->ibv.fde_channel);
+ TALLOC_FREE(state->t->rdma.fde_channel);
+
+ if (state->req.mr != NULL) {
+ ibv_dereg_mr(state->req.mr);
+ state->req.mr = NULL;
+ }
+
+ return 0;
+}
+
+static void smb_transport_direct_connect_negotiate_rdma_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data);
+static void smb_transport_direct_connect_negotiate_ibv_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data);
+
+struct tevent_req *smb_transport_direct_connect_negotiate_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct smb_transport *transport)
+{
+ struct tevent_req *req;
+ struct smb_transport_direct_connect_negotiate_state *state;
+ struct smb_transport_direct *t =
+ smb_transport_data(transport,
+ struct smb_transport_direct);
+ struct ibv_recv_wr *bad_recv_wr = NULL;
+ struct ibv_send_wr *bad_send_wr = NULL;
+ NTSTATUS status;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct smb_transport_direct_connect_negotiate_state);
+ if (req == NULL) {
+ return NULL;
+ }
+ state->t = t;
+
+ talloc_set_destructor(state, smb_transport_direct_connect_negotiate_destructor);
+
+ state->t->ibv.fde_channel = tevent_add_fd(ev, state->t,
+ state->t->ibv.comp_channel->fd,
+ TEVENT_FD_READ,
+ smb_transport_direct_connect_negotiate_ibv_handler,
+ req);
+ if (tevent_req_nomem(state->t->ibv.fde_channel, req)) {
+ return tevent_req_post(req, ev);
+ }
+ state->t->rdma.fde_channel = tevent_add_fd(ev, state->t,
+ state->t->rdma.cm_channel->fd,
+ TEVENT_FD_READ,
+ smb_transport_direct_connect_negotiate_rdma_handler,
+ req);
+ if (tevent_req_nomem(state->t->rdma.fde_channel, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->rdma_read.sge[0].addr = 1;
+ state->rdma_read.sge[0].length = 0;
+ state->rdma_read.sge[0].lkey = 1;
+ state->rdma_read.wr.opcode = IBV_WR_RDMA_READ;
+ state->rdma_read.wr.send_flags = IBV_SEND_SIGNALED;
+ state->rdma_read.wr.sg_list = state->rdma_read.sge;
+ state->rdma_read.wr.num_sge = ARRAY_SIZE(state->rdma_read.sge);
+ state->rdma_read.wr.wr.rdma.rkey = 1;
+ state->rdma_read.wr.wr.rdma.remote_addr = 1;
+
+ SSVAL(state->req.buffer, 0x00, 0x0100);
+ SSVAL(state->req.buffer, 0x02, 0x0100);
+ SSVAL(state->req.buffer, 0x04, 0x0000);
+ SSVAL(state->req.buffer, 0x06, state->t->state.send_credit_target);
+ SIVAL(state->req.buffer, 0x08, state->t->state.max_send_size);
+ SIVAL(state->req.buffer, 0x0C, state->t->state.max_receive_size);
+ SIVAL(state->req.buffer, 0x10, state->t->state.max_fragmented_size);
+
+ state->req.mr = ibv_reg_mr(state->t->ibv.pd,
+ state->req.buffer,
+ 0x14,//sizeof(state->req.buffer),
+ 0);
+ if (tevent_req_nomem(state->req.mr, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ state->req.sge[0].addr = (uint64_t) (uintptr_t) state->req.buffer;
+ state->req.sge[0].length = 0x14;//sizeof(state->req.buffer);
+ state->req.sge[0].lkey = state->req.mr->lkey;
+ state->req.wr.opcode = IBV_WR_SEND;
+ state->req.wr.send_flags = IBV_SEND_SIGNALED;
+ state->req.wr.sg_list = state->req.sge;
+ state->req.wr.num_sge = ARRAY_SIZE(state->req.sge);
+
+ state->rep.mr = ibv_reg_mr(state->t->ibv.pd,
+ state->rep.buffer,
+ sizeof(state->rep.buffer),
+ IBV_ACCESS_LOCAL_WRITE);
+ if (tevent_req_nomem(state->rep.mr, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ memset(state->rep.buffer, 0x1F, sizeof(state->rep.buffer));
+ state->rep.sge[0].addr = (uint64_t) (uintptr_t) state->rep.buffer;
+ state->rep.sge[0].length = sizeof(state->rep.buffer);
+ state->rep.sge[0].lkey = state->rep.mr->lkey;
+ state->rep.wr.sg_list = state->rep.sge;
+ state->rep.wr.num_sge = ARRAY_SIZE(state->rep.sge);
+
+ errno = 0;
+ ret = ibv_post_recv(state->t->ibv.qp, &state->rep.wr, &bad_recv_wr);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return tevent_req_post(req, ev);
+ }
+
+ state->rdma_read.wr.next = &state->req.wr;
+ errno = 0;
+ ret = ibv_post_send(state->t->ibv.qp, &state->rdma_read.wr, &bad_send_wr);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return tevent_req_post(req, ev);
+ }
+
+ DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
+ __location__, __FUNCTION__, ret, errno));
+ return req;
+}
+
+static void smb_transport_direct_connect_negotiate_rdma_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data)
+{
+ struct tevent_req *req =
+ talloc_get_type_abort(private_data,
+ struct tevent_req);
+ struct smb_transport_direct_connect_negotiate_state *state =
+ tevent_req_data(req,
+ struct smb_transport_direct_connect_negotiate_state);
+ NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
+ int ret;
+
+ errno = 0;
+
+ ret = rdma_get_cm_event(state->t->rdma.cm_channel,
+ &state->t->rdma.cm_event);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ if (state->t->rdma.cm_event->status != 0) {
+ errno = state->t->rdma.cm_event->status;
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ switch (state->t->rdma.cm_event->event) {
+ case RDMA_CM_EVENT_DISCONNECTED:
+ status = NT_STATUS_CONNECTION_DISCONNECTED;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ case RDMA_CM_EVENT_ESTABLISHED:
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ case RDMA_CM_EVENT_CONNECT_RESPONSE:
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_REJECTED:
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ case RDMA_CM_EVENT_MULTICAST_JOIN:
+ case RDMA_CM_EVENT_MULTICAST_ERROR:
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ status = NT_STATUS_INTERNAL_ERROR;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+}
+
+static void smb_transport_direct_connect_negotiate_ibv_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data)
+{
+ struct tevent_req *req =
+ talloc_get_type_abort(private_data,
+ struct tevent_req);
+ struct smb_transport_direct_connect_negotiate_state *state =
+ tevent_req_data(req,
+ struct smb_transport_direct_connect_negotiate_state);
+ struct ibv_cq *cq = NULL;
+ void *cq_context = NULL;
+ NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
+ struct ibv_wc wc;
+ int ret;
+ uint16_t credits_requested;
+ uint16_t credits_granted;
+ uint32_t max_read_write_size;
+ uint32_t preferred_send_size;
+ uint32_t max_receive_size;
+ uint32_t max_fragmented_size;
+ uint32_t tmp;
+ uint8_t *ptr;
+ struct ibv_recv_wr *bad_recv_wr = NULL;
+
+ errno = 0;
+ ret = ibv_get_cq_event(state->t->ibv.comp_channel,
+ &cq, &cq_context);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ ibv_ack_cq_events(cq, 1);
+
+ if (cq_context != state->t) {
+ status = NT_STATUS_INTERNAL_ERROR;;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ errno = 0;
+ ret = ibv_req_notify_cq(cq, 0);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ errno = 0;
+ ZERO_STRUCT(wc);
+ ret = ibv_poll_cq(cq, 1, &wc);
+ if (ret != 1) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ ret = 0;
+
+ if (wc.status == IBV_WC_WR_FLUSH_ERR) {
+ //errno = wc.status;
+ status = map_nt_error_from_unix_common(wc.status);//errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ TALLOC_FREE(state->t->ibv.fde_channel);
+ TALLOC_FREE(state->t->rdma.fde_channel);
+ smb_transport_direct_connect_negotiate_rdma_handler(ev, fde, flags, private_data);
+ return;
+ }
+ if (wc.status != IBV_WC_SUCCESS) {
+ errno = wc.status;
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status),
+ ibv_wc_status_str(wc.status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ switch (wc.opcode) {
+ case IBV_WC_SEND:
+ DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
+ __location__, __FUNCTION__, ret, errno));
+ break;
+ case IBV_WC_RDMA_READ:
+ DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
+ __location__, __FUNCTION__, ret, errno));
+ break;
+ case IBV_WC_RECV:
+ DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
+ __location__, __FUNCTION__, ret, errno));
+ dump_data(0, state->rep.buffer, wc.byte_len);
+ if (wc.byte_len < 0x20) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ if (SVAL(state->rep.buffer, 0x00) != 0x0100) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ if (SVAL(state->rep.buffer, 0x02) != 0x0100) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ if (SVAL(state->rep.buffer, 0x04) != 0x0100) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ credits_requested = SVAL(state->rep.buffer, 0x08);
+ if (credits_requested == 0) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ credits_granted = SVAL(state->rep.buffer, 0x0A);
+ if (credits_granted == 0) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ status = NT_STATUS(IVAL(state->rep.buffer, 0x0C));
+ if (!NT_STATUS_IS_OK(status)) {
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ max_read_write_size = IVAL(state->rep.buffer, 0x10);
+ preferred_send_size = IVAL(state->rep.buffer, 0x14);
+ if (preferred_send_size > state->t->state.max_receive_size) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ max_receive_size = IVAL(state->rep.buffer, 0x18);
+ if (max_receive_size < 0x80) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+ max_fragmented_size = IVAL(state->rep.buffer, 0x1C);
+ if (max_fragmented_size < 0x20000) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ state->t->state.receive_credit_target = credits_requested;
+
+ tmp = state->t->state.max_receive_size;
+ tmp = MIN(tmp, preferred_send_size);
+ tmp = MAX(tmp, 128);
+ state->t->state.max_receive_size = tmp;
+
+ tmp = state->t->state.max_send_size;
+ tmp = MIN(tmp, max_receive_size);
+ state->t->state.max_send_size = tmp;
+
+ tmp = MIN(1048576, max_read_write_size);
+ state->t->state.max_read_write_size = tmp;
+
+ tmp = state->t->state.max_fragmented_size;
+ tmp = MIN(tmp, max_fragmented_size);
+ state->t->state.max_fragmented_size = tmp;
+
+ state->t->state.send_credits = credits_granted;
+
+ TALLOC_FREE(state->t->ibv.fde_channel);
+ TALLOC_FREE(state->t->rdma.fde_channel);
+
+ DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
+ __location__, __FUNCTION__, ret, errno));
+
+ state->t->inbuf.hdr_mr = ibv_reg_mr(state->t->ibv.pd,
+ state->t->inbuf.hdr,
+ sizeof(state->t->inbuf.hdr),
+ IBV_ACCESS_LOCAL_WRITE);
+ if (tevent_req_nomem(state->t->inbuf.hdr_mr, req)) {
+ return;
+ }
+
+ ptr = talloc_array(state->t, uint8_t,
+ state->t->state.max_receive_size - 0x14);
+ if (tevent_req_nomem(ptr, req)) {
+ return;
+ }
+ state->t->inbuf.reassembly_buffer = ptr;
+
+ state->t->inbuf.cur_mr = ibv_reg_mr(state->t->ibv.pd,
+ ptr,
+ talloc_get_size(ptr),
+ IBV_ACCESS_LOCAL_WRITE);
+ if (tevent_req_nomem(state->t->inbuf.cur_mr, req)) {
+ return;
+ }
+
+ state->t->inbuf.sge[0].addr = (uint64_t) (uintptr_t) state->t->inbuf.hdr;
+ state->t->inbuf.sge[0].length = sizeof(state->t->inbuf.hdr);
+ state->t->inbuf.sge[0].lkey = state->t->inbuf.hdr_mr->lkey;
+ state->t->inbuf.sge[1].addr = (uint64_t) (uintptr_t) (ptr + 4);
+ state->t->inbuf.sge[1].length = talloc_get_size(ptr) - 4;
+ state->t->inbuf.sge[1].lkey = state->t->inbuf.cur_mr->lkey;
+ state->t->inbuf.wr.sg_list = state->t->inbuf.sge;
+ state->t->inbuf.wr.num_sge = ARRAY_SIZE(state->t->inbuf.sge);
+
+ errno = 0;
+ ret = ibv_post_recv(state->t->ibv.qp, &state->t->inbuf.wr, &bad_recv_wr);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ tevent_req_nterror(req, status);
+ return;
+ }
+
+ tevent_req_done(req);
+ return;
+
+ case IBV_WC_RDMA_WRITE:
+ default:
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_destructor(state->t);
+ tevent_req_nterror(req, status);
+ return;
+ }
+}
+
+NTSTATUS smb_transport_direct_connect_negotiate_recv(struct tevent_req *req)
+{
+ NTSTATUS status;
+
+ if (tevent_req_is_nterror(req, &status)) {
+ tevent_req_received(req);
+ return status;
+ }
+
+ tevent_req_received(req);
+ return NT_STATUS_OK;
+}
+
+static void smb_transport_direct_data_disconnect(struct smb_transport_direct *t,
+ NTSTATUS status)
+{
+ if (NT_STATUS_IS_OK(status)) {
+ status = NT_STATUS_UNEXPECTED_NETWORK_ERROR;
+ }
+
+ if (t->reqs.read_pdu_req != NULL) {
+ tevent_req_defer_callback(t->reqs.read_pdu_req,
+ t->reqs.last_ev);
+ tevent_req_nterror(t->reqs.read_pdu_req, status);
+ t->reqs.read_pdu_req = NULL;
+ }
+
+ if (t->reqs.write_pdu_req != NULL) {
+ tevent_req_defer_callback(t->reqs.write_pdu_req,
+ t->reqs.last_ev);
+ tevent_req_nterror(t->reqs.write_pdu_req, status);
+ t->reqs.write_pdu_req = NULL;
+ }
+
+ smb_transport_direct_destructor(t);
+}
+
+static void smb_transport_direct_data_rdma_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data)
+{
+ struct smb_transport_direct *t =
+ talloc_get_type_abort(private_data,
+ struct smb_transport_direct);
+ NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
+ int ret;
+
+ errno = 0;
+
+ ret = rdma_get_cm_event(t->rdma.cm_channel,
+ &t->rdma.cm_event);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+
+ if (t->rdma.cm_event->status != 0) {
+ errno = t->rdma.cm_event->status;
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+
+ switch (t->rdma.cm_event->event) {
+ case RDMA_CM_EVENT_DISCONNECTED:
+ status = NT_STATUS_CONNECTION_DISCONNECTED;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ case RDMA_CM_EVENT_ESTABLISHED:
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ case RDMA_CM_EVENT_CONNECT_RESPONSE:
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_REJECTED:
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ case RDMA_CM_EVENT_MULTICAST_JOIN:
+ case RDMA_CM_EVENT_MULTICAST_ERROR:
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+
+ status = NT_STATUS_INTERNAL_ERROR;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+}
+
+static void smb_transport_direct_data_ibv_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t fde_flags,
+ void *private_data)
+{
+ struct smb_transport_direct *t =
+ talloc_get_type_abort(private_data,
+ struct smb_transport_direct);
+ struct ibv_cq *cq = NULL;
+ void *cq_context = NULL;
+ NTSTATUS status = NT_STATUS_INTERNAL_ERROR;
+ struct ibv_wc wc;
+ int ret;
+ uint8_t *ptr;
+ uint16_t credits_requested;
+ uint16_t credits_granted;
+ uint16_t flags;
+ uint32_t remaining_length;
+ uint32_t data_offset;
+ uint32_t data_length;
+ struct ibv_recv_wr *bad_recv_wr = NULL;
+
+ errno = 0;
+ ret = ibv_get_cq_event(t->ibv.comp_channel,
+ &cq, &cq_context);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+
+ ibv_ack_cq_events(cq, 1);
+
+ if (cq_context != t) {
+ status = NT_STATUS_INTERNAL_ERROR;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+
+ errno = 0;
+ ret = ibv_req_notify_cq(cq, 0);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+
+ errno = 0;
+ ZERO_STRUCT(wc);
+ ret = ibv_poll_cq(cq, 1, &wc);
+ if (ret != 1) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+ ret = 0;
+
+ if (wc.status == IBV_WC_WR_FLUSH_ERR) {
+ errno = wc.status;
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status),
+ ibv_wc_status_str(wc.status)));
+ TALLOC_FREE(t->ibv.fde_channel);
+ TALLOC_FREE(t->rdma.fde_channel);
+ smb_transport_direct_data_rdma_handler(ev, fde, 0 /* flags */, private_data);
+ return;
+ }
+ if (wc.status != IBV_WC_SUCCESS) {
+ errno = wc.status;
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s] ibv[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status),
+ ibv_wc_status_str(wc.status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+
+ switch (wc.opcode) {
+ case IBV_WC_SEND:
+ DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
+ __location__, __FUNCTION__, ret, errno));
+ if (t->reqs.write_pdu_req == NULL) {
+ status = NT_STATUS_INTERNAL_ERROR;
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+ tevent_req_defer_callback(t->reqs.write_pdu_req,
+ t->reqs.last_ev);
+ tevent_req_done(t->reqs.write_pdu_req);
+ return;
+
+ case IBV_WC_RECV:
+ DEBUG(0,("%s:%s: ret[%d] errno[%d]\n",
+ __location__, __FUNCTION__, ret, errno));
+ if (wc.byte_len < 0x14) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+ dump_data(0, t->inbuf.hdr, MIN(wc.byte_len, sizeof(t->inbuf.hdr)));
+ credits_requested = SVAL(t->inbuf.hdr, 0x00);
+ if (credits_requested == 0) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+ credits_granted = SVAL(t->inbuf.hdr, 0x02);
+ if (credits_granted == 0) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+ flags = SVAL(t->inbuf.hdr, 0x04);
+ remaining_length = IVAL(t->inbuf.hdr, 0x08);
+ data_offset = IVAL(t->inbuf.hdr, 0x0C);
+ data_length = IVAL(t->inbuf.hdr, 0x10);
+
+ t->state.receive_credits -= 1;
+ t->state.receive_credit_target = credits_requested;
+ t->state.send_credits += credits_granted;
+
+ if (data_offset == 0) {
+ if (wc.byte_len != 0x14) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+ goto repost_receive;
+ } else if (data_offset == 0x18) {
+ if (data_length >= (t->state.max_receive_size - data_offset)) {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+ } else {
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+
+ if (remaining_length > 0) {
+ // TODO: ...
+ }
+
+ if (flags) {
+ // TODO: ...
+ }
+
+ if (t->reqs.read_pdu_req == NULL) {
+ // TODO: is this correct???
+ goto repost_receive;
+ }
+
+ if (t->reqs.read_pdu_req != NULL) {
+ TALLOC_FREE(t->inbuf.full_buffer);
+ if (t->inbuf.cur_mr != NULL) {
+ ibv_dereg_mr(t->inbuf.cur_mr);
+ t->inbuf.cur_mr = NULL;
+ }
+ ptr = t->inbuf.reassembly_buffer;
+ ptr = talloc_realloc(t, ptr, uint8_t, 4 + data_length);
+ if (ptr == NULL) {
+ status = NT_STATUS_NO_MEMORY;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+
+ t->inbuf.full_buffer = ptr;
+ t->inbuf.reassembly_buffer= NULL;
+ tevent_req_defer_callback(t->reqs.read_pdu_req,
+ t->reqs.last_ev);
+ tevent_req_done(t->reqs.read_pdu_req);
+ /* no return here */
+ }
+
+ ptr = talloc_array(t, uint8_t,
+ t->state.max_receive_size - 0x14);
+ if (ptr == NULL) {
+ status = NT_STATUS_NO_MEMORY;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+ t->inbuf.reassembly_buffer = ptr;
+
+ t->inbuf.cur_mr = ibv_reg_mr(t->ibv.pd,
+ ptr,
+ talloc_get_size(ptr),
+ IBV_ACCESS_LOCAL_WRITE);
+ if (t->inbuf.cur_mr == NULL) {
+ status = NT_STATUS_NO_MEMORY;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+
+ t->inbuf.sge[1].addr = (uint64_t) (uintptr_t) (ptr + 4);
+ t->inbuf.sge[1].length = talloc_get_size(ptr) - 4;
+ t->inbuf.sge[1].lkey = t->inbuf.cur_mr->lkey;
+
+repost_receive:
+ errno = 0;
+ ret = ibv_post_recv(t->ibv.qp, &t->inbuf.wr, &bad_recv_wr);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+
+ return;
+
+ case IBV_WC_RDMA_READ:
+ case IBV_WC_RDMA_WRITE:
+ default:
+ status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(t, status);
+ return;
+ }
+}
+
+static NTSTATUS smb_transport_direct_data_setup_events(struct smb_transport_direct *t,
+ struct tevent_context *ev)
+{
+ if (tevent_fd_get_flags(t->ibv.fde_channel) == 0) {
+ t->reqs.last_ev = NULL;
+ TALLOC_FREE(t->ibv.fde_channel);
+ TALLOC_FREE(t->rdma.fde_channel);
+ }
+
+ if (tevent_fd_get_flags(t->rdma.fde_channel) == 0) {
+ t->reqs.last_ev = NULL;
+ TALLOC_FREE(t->ibv.fde_channel);
+ TALLOC_FREE(t->rdma.fde_channel);
+ }
+
+ if (t->reqs.read_pdu_req == NULL && t->reqs.write_pdu_req == NULL) {
+ t->reqs.last_ev = NULL;
+ TALLOC_FREE(t->ibv.fde_channel);
+ TALLOC_FREE(t->rdma.fde_channel);
+ }
+
+ if (ev == NULL) {
+ t->reqs.last_ev = NULL;
+ TALLOC_FREE(t->ibv.fde_channel);
+ TALLOC_FREE(t->rdma.fde_channel);
+ } else if (ev == t->reqs.last_ev) {
+ return NT_STATUS_OK;
+ } else if (t->reqs.last_ev == NULL) {
+ /* fallthrough */
+ } else {
+ return NT_STATUS_INVALID_PARAMETER_MIX;
+ }
+
+ t->ibv.fde_channel = tevent_add_fd(ev, t,
+ t->ibv.comp_channel->fd,
+ TEVENT_FD_READ,
+ smb_transport_direct_data_ibv_handler,
+ t);
+ if (t->ibv.fde_channel == NULL) {
+ return NT_STATUS_NO_MEMORY;
+ }
+ t->rdma.fde_channel = tevent_add_fd(ev, t,
+ t->rdma.cm_channel->fd,
+ TEVENT_FD_READ,
+ smb_transport_direct_data_rdma_handler,
+ t);
+ if (t->rdma.fde_channel == NULL) {
+ TALLOC_FREE(t->ibv.fde_channel);
+ return NT_STATUS_NO_MEMORY;
+ }
+
+ t->reqs.last_ev = ev;
+ return NT_STATUS_OK;
+}
+
+struct smb_transport_direct_write_pdu_state {
+ struct smb_transport_direct *t;
+ uint8_t *buffer;
+ struct ibv_mr *mr;
+ struct ibv_sge sge[1];
+ struct ibv_send_wr wr;
+};
+
+static int smb_transport_direct_write_pdu_destructor(
+ struct smb_transport_direct_write_pdu_state *state)
+{
+ if (state->t) {
+ state->t->reqs.write_pdu_req = NULL;
+ }
+
+ if (state->mr != NULL) {
+ ibv_dereg_mr(state->mr);
+ state->mr = NULL;
+ }
+
+ return 0;
+}
+
+static struct tevent_req *smb_transport_direct_write_pdu_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct smb_transport *transport,
+ const struct iovec *vector,
+ size_t count)
+{
+ struct tevent_req *req;
+ struct smb_transport_direct_write_pdu_state *state;
+ struct smb_transport_direct *t =
+ smb_transport_data(transport,
+ struct smb_transport_direct);
+ size_t to_write = 0;
+ size_t current_len = 0;
+ size_t remaining_len = 0;
+ uint8_t *ptr = NULL;
+ size_t buf_len = 0;
+ size_t i;
+ NTSTATUS status;
+ int ret;
+ struct ibv_send_wr *bad_send_wr = NULL;
+ uint16_t granted = 0;
+ uint16_t flags = 0;
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct smb_transport_direct_write_pdu_state);
+ if (req == NULL) {
+ return NULL;
+ }
+ state->t = t;
+ talloc_set_destructor(state, smb_transport_direct_write_pdu_destructor);
+
+ /* first check if the input is ok */
+#ifdef IOV_MAX
+ if (count > IOV_MAX) {
+ tevent_req_nterror(req, NT_STATUS_INVALID_PARAMETER_MIX);
+ return tevent_req_post(req, ev);
+ }
+#endif
+
+ for (i=0; i < count; i++) {
+ size_t tmp = to_write;
+ tmp += vector[i].iov_len;
+
+ if (tmp < to_write) {
+ tevent_req_nterror(req, NT_STATUS_INVALID_PARAMETER_MIX);
+ return tevent_req_post(req, ev);
+ }
+
+ to_write = tmp;
+ }
+
+ if (to_write == 0) {
+ tevent_req_nterror(req, NT_STATUS_INVALID_PARAMETER_MIX);
+ return tevent_req_post(req, ev);
+ }
+
+ if (to_write > state->t->state.max_fragmented_size) {
+ tevent_req_nterror(req, NT_STATUS_INVALID_PARAMETER_MIX);
+ return tevent_req_post(req, ev);
+ }
+
+ if (state->t->state.send_credits == 0) {
+ tevent_req_nterror(req, NT_STATUS_INVALID_PARAMETER_MIX);
+ return tevent_req_post(req, ev);
+ }
+
+ current_len = MIN(state->t->state.max_send_size - 0x18, to_write);
+ remaining_len = to_write - current_len;
+
+ buf_len = 0x18 + current_len;
+ state->buffer = talloc_zero_array(state, uint8_t, buf_len);
+ if (tevent_req_nomem(state->buffer, req)) {
+ return tevent_req_post(req, ev);
+ }
+
+ granted = state->t->state.receive_credit_max;
+ granted -= state->t->state.receive_credits;
+ granted = MIN(granted, state->t->state.receive_credit_target);
+ state->t->state.receive_credits += granted;
+ state->t->state.send_credits -= 1;
+
+ if (state->t->state.send_credits == 0) {
+ flags |= 0x0001;
+ }
+
+ SSVAL(state->buffer, 0x00, state->t->state.send_credit_target);
+ SSVAL(state->buffer, 0x02, granted);
+ SSVAL(state->buffer, 0x04, flags);
+ SSVAL(state->buffer, 0x06, 0x0000);
+ SIVAL(state->buffer, 0x08, remaining_len);
+ SIVAL(state->buffer, 0x0C, 0x00000018);
+ SIVAL(state->buffer, 0x10, current_len);
+ SIVAL(state->buffer, 0x14, 0x00000000);
+
+ ptr = &state->buffer[0x18];
+ for (i=0; i < count; i++) {
+ const uint8_t *this_buf = (const uint8_t *)vector[i].iov_base;
+ size_t this_len = MIN(current_len, vector[i].iov_len);
+
+ memcpy(ptr, this_buf, this_len);
+ ptr += this_len;
+ current_len -= this_len;
+ if (current_len == 0) {
+ break;
+ }
+ }
+
+ status = smb_transport_direct_data_setup_events(state->t, ev);
+ if (!NT_STATUS_IS_OK(status)) {
+ smb_transport_direct_data_disconnect(state->t, status);
+ tevent_req_nterror(req, status);
+ return tevent_req_post(req, ev);
+ }
+ state->t->reqs.write_pdu_req = req;
+
+ state->mr = ibv_reg_mr(state->t->ibv.pd,
+ state->buffer,
+ state->t->state.max_send_size,
+ 0);
+ if (state->mr == NULL) {
+ smb_transport_direct_data_disconnect(state->t, NT_STATUS_NO_MEMORY);
+ return req;
+ }
+
+ state->sge[0].addr = (uint64_t) (uintptr_t) state->buffer;
+ state->sge[0].length = buf_len;
+ state->sge[0].lkey = state->mr->lkey;
+ state->wr.opcode = IBV_WR_SEND;
+ state->wr.send_flags = IBV_SEND_SIGNALED;
+ state->wr.sg_list = state->sge;
+ state->wr.num_sge = ARRAY_SIZE(state->sge);
+
+ errno = 0;
+ ret = ibv_post_send(state->t->ibv.qp, &state->wr, &bad_send_wr);
+ if (ret != 0) {
+ status = map_nt_error_from_unix_common(errno);
+ DEBUG(0,("%s:%s: ret[%d] errno[%d] status[%s]\n",
+ __location__, __FUNCTION__, ret, errno, nt_errstr(status)));
+ smb_transport_direct_data_disconnect(state->t, status);
+ return req;
+ }
+
+ return req;
+}
+
+static NTSTATUS smb_transport_direct_write_pdu_recv(struct tevent_req *req)
+{
+ struct smb_transport_direct_write_pdu_state *state =
+ tevent_req_data(req,
+ struct smb_transport_direct_write_pdu_state);
+ NTSTATUS status;
+
+ state->t->reqs.write_pdu_req = NULL;
+
+ if (tevent_req_is_nterror(req, &status)) {
+ tevent_req_received(req);
+ return status;
+ }
+
+ tevent_req_received(req);
+ return NT_STATUS_OK;
+}
+
+struct smb_transport_direct_read_pdu_state {
+ struct smb_transport_direct *t;
+ uint8_t *inbuf;
+};
+
+static int smb_transport_direct_read_pdu_destructor(
+ struct smb_transport_direct_read_pdu_state *state)
+{
+ if (state->t != NULL) {
+ state->t->reqs.read_pdu_req = NULL;
+ }
+
+ return 0;
+}
+
+static struct tevent_req *smb_transport_direct_read_pdu_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ struct smb_transport *transport)
+{
+ struct tevent_req *req;
+ struct smb_transport_direct_read_pdu_state *state;
+ struct smb_transport_direct *t =
+ smb_transport_data(transport,
+ struct smb_transport_direct);
+
+ req = tevent_req_create(mem_ctx, &state,
+ struct smb_transport_direct_read_pdu_state);
+ if (req == NULL) {
+ return NULL;
+ }
+ state->t = t;
+ talloc_set_destructor(state, smb_transport_direct_read_pdu_destructor);
+
+ state->t->reqs.read_pdu_req = req;
+
+ return req;
+}
+
+static NTSTATUS smb_transport_direct_read_pdu_recv(struct tevent_req *req,
+ TALLOC_CTX *mem_ctx,
+ struct iovec *vector)
+{
+ struct smb_transport_direct_read_pdu_state *state =
+ tevent_req_data(req,
+ struct smb_transport_direct_read_pdu_state);
+ NTSTATUS status;
+
+ state->t->reqs.read_pdu_req = NULL;
+
+ if (tevent_req_is_nterror(req, &status)) {
+ tevent_req_received(req);
+ return status;
+ }
+
+ vector->iov_len = talloc_get_size(state->t->inbuf.full_buffer);
+ _smb_setlen_tcp(state->t->inbuf.full_buffer, vector->iov_len - 4);
+ vector->iov_base = talloc_move(mem_ctx, &state->t->inbuf.full_buffer);
+
+ tevent_req_received(req);
+ return NT_STATUS_OK;
+}
+
+static const struct smb_transport_ops smb_transport_direct_ops = {
+ .name = "smbdirect",
+
+ .read_pdu_send = smb_transport_direct_read_pdu_send,
+ .read_pdu_recv = smb_transport_direct_read_pdu_recv,
+
+ .write_pdu_send = smb_transport_direct_write_pdu_send,
+ .write_pdu_recv = smb_transport_direct_write_pdu_recv,
+};
+
+NTSTATUS _smb_transport_direct_create(TALLOC_CTX *mem_ctx,
+ struct smb_transport **_transport,
+ const char *location)
+{
+ struct smb_transport *transport;
+ struct smb_transport_direct *t;
+ int ret;
+
+ transport = smb_transport_create(mem_ctx,
+ &smb_transport_direct_ops,
+ &t,
+ struct smb_transport_direct,
+ location);
+ if (transport == NULL) {
+ return NT_STATUS_NO_MEMORY;
+ }
+ talloc_set_destructor(t, smb_transport_direct_destructor);
+
+ t->state.max_send_size = 1364;
+ t->state.max_receive_size = 8192;
+ t->state.max_fragmented_size = 1048576;
+ t->state.max_read_write_size = 0;
+ t->state.receive_credit_max = 1;
+ t->state.send_credit_target = 255;
+ t->state.keep_alive_internal = 5;
+
+ t->rdma.cm_channel = rdma_create_event_channel();
+ if (t->rdma.cm_channel == NULL) {
+ talloc_free(transport);
+ return NT_STATUS_NO_MEMORY;
+ }
+
+ set_blocking(t->rdma.cm_channel->fd, false);
+ smb_set_close_on_exec(t->rdma.cm_channel->fd);
+
+#if RDMA_USER_CM_MAX_ABI_VERSION >= 2
+ ret = rdma_create_id(t->rdma.cm_channel,
+ &t->rdma.cm_id,
+ t, RDMA_PS_TCP);
+#else
+#error
+ ret = rdma_create_id(t->rdma.cm_channel,
+ &t->rdma.cm_id,
+ t);
+#endif
+ if (ret != 0) {
+ talloc_free(transport);
+ return NT_STATUS_NO_MEMORY;
+ }
+
+ *_transport = transport;
+ return NT_STATUS_OK;
+}
+
+#endif /* SMB_TRANSPORT_ENABLE_RDMA */