P7.X: smb2_read state->smb2req->queue_entry.vfs_io
authorStefan Metzmacher <metze@samba.org>
Thu, 1 Oct 2020 15:46:04 +0000 (17:46 +0200)
committerStefan Metzmacher <metze@samba.org>
Mon, 16 Oct 2023 11:03:52 +0000 (13:03 +0200)
source3/smbd/globals.h
source3/smbd/smb2_read.c
source3/smbd/smb2_server.c

index 885f1ece3588206492a43b4443dd9fd93f39bf04..9df37f5b9d061de6d78f9b77627cd7afc8cee238 100644 (file)
@@ -714,6 +714,9 @@ struct smbd_smb2_send_queue {
                struct samba_io_uring_submission submission;
        } io_uring;
 
+       struct smb_vfs_io *vfs_io;
+       size_t vfs_io_size;
+
        struct {
                struct tevent_req *req;
                struct timeval timeout;
index bac78e4c77a4f4391ccada0734bc9ceafc61d692..ca2490f4ea5b5a4468d8e6c8d4622b7a3bad2c77 100644 (file)
@@ -192,6 +192,9 @@ struct smbd_smb2_read_state {
        uint32_t in_minimum;
        DATA_BLOB out_headers;
        uint8_t _out_hdr_buf[NBT_HDR_SIZE + SMB2_HDR_BODY + 0x10];
+       struct smb_vfs_io *vfs_io;
+       size_t vfs_io_length;
+       struct tevent_req *vfs_io_subreq;
        DATA_BLOB out_data;
        uint32_t out_remaining;
 };
@@ -383,6 +386,7 @@ static NTSTATUS schedule_smb2_sendfile_read(struct smbd_smb2_request *smb2req,
 }
 
 static void smbd_smb2_read_pipe_done(struct tevent_req *subreq);
+static void smbd_smb2_read_io_done(struct tevent_req *subreq);
 
 /*******************************************************************
  Common read complete processing function for both synchronous and
@@ -444,6 +448,19 @@ static bool smbd_smb2_read_cancel(struct tevent_req *req)
        return cancel_smb2_aio(state->smbreq);
 }
 
+static bool smbd_smb2_read_io_cancel(struct tevent_req *req)
+{
+       struct smbd_smb2_read_state *state =
+               tevent_req_data(req,
+               struct smbd_smb2_read_state);
+
+       if (state->vfs_io_subreq == NULL) {
+               return false;
+       }
+
+       return tevent_req_cancel(state->vfs_io_subreq);
+}
+
 static struct tevent_req *smbd_smb2_read_send(TALLOC_CTX *mem_ctx,
                                              struct tevent_context *ev,
                                              struct smbd_smb2_request *smb2req,
@@ -536,6 +553,86 @@ static struct tevent_req *smbd_smb2_read_send(TALLOC_CTX *mem_ctx,
                return tevent_req_post(req, ev);
        }
 
+       if (smb2req->xconn->smb2.send_uring != NULL &&
+           !smb2req->do_signing && !smb2req->do_encryption &&
+           lp_parm_bool(-1, "smb2srv", "vfs_io_read", false))
+       {
+               size_t min_aio_read_size = lp_aio_read_size(SNUM(conn));
+               struct tevent_req *subreq = NULL;
+               bool ok;
+
+               ok = vfs_valid_pread_range(state->in_offset,
+                                          state->in_length);
+               if (!ok) {
+                       tevent_req_nterror(req, NT_STATUS_INVALID_PARAMETER);
+                       return tevent_req_post(req, ev);
+               }
+
+               if (fsp->base_fsp != NULL) {
+                       /* No AIO on streams yet */
+                       goto fallback_sync;
+               }
+
+               if (fsp->op == NULL) {
+                       /* No AIO on internal opens. */
+                       goto fallback_sync;
+               }
+
+               if (!(state->in_length >= min_aio_read_size) &&
+                   !SMB_VFS_AIO_FORCE(fsp))
+               {
+                       /* Too small a read for aio request. */
+                       DBG_DEBUG("read size (%u) too small "
+                                 "for minimum async vfs_io_read of %u\n",
+                                 (unsigned int)state->in_length,
+                                 (unsigned int)min_aio_read_size);
+                       goto fallback_sync;
+               }
+
+               if (smbd_smb2_is_compound(smbreq->smb2req)) {
+                       goto fallback_sync;
+               }
+
+               /* Create the io strucure. */
+               state->vfs_io = smb_vfs_io_get(state,
+                                              smb2req->sconn->vfs_io_pool);
+               if (state->vfs_io == NULL) {
+                       tevent_req_nterror(req, NT_STATUS_INSUFFICIENT_RESOURCES);
+                       return tevent_req_post(req, ev);
+               }
+
+               init_strict_lock_struct(fsp,
+                                       fsp->op->global->open_persistent_id,
+                                       in_offset,
+                                       in_length,
+                                       READ_LOCK,
+                                       &lock);
+               if (!SMB_VFS_STRICT_LOCK_CHECK(conn, fsp, &lock)) {
+                       tevent_req_nterror(req, NT_STATUS_FILE_LOCK_CONFLICT);
+                       return tevent_req_post(req, ev);
+               }
+
+               subreq = SMB_VFS_IO_READ_SEND(state,
+                                             ev,
+                                             fsp,
+                                             state->vfs_io,
+                                             in_length,
+                                             in_offset);
+               if (tevent_req_nomem(subreq, req)) {
+                       return tevent_req_post(req, ev);
+               }
+               tevent_req_set_callback(subreq,
+                                       smbd_smb2_read_io_done,
+                                       req);
+               /*
+                * Doing an async read, allow this
+                * request to be canceled
+                */
+               state->vfs_io_subreq = subreq;
+               tevent_req_set_cancel_fn(req, smbd_smb2_read_io_cancel);
+               return req;
+       }
+
        status = schedule_smb2_aio_read(fsp->conn,
                                smbreq,
                                fsp,
@@ -559,6 +656,7 @@ static struct tevent_req *smbd_smb2_read_send(TALLOC_CTX *mem_ctx,
                return tevent_req_post(req, ev);
        }
 
+fallback_sync:
        /* Fallback to synchronous. */
 
        init_strict_lock_struct(fsp,
@@ -652,6 +750,54 @@ static void smbd_smb2_read_pipe_done(struct tevent_req *subreq)
        tevent_req_done(req);
 }
 
+static void smbd_smb2_read_io_done(struct tevent_req *subreq)
+{
+       struct tevent_req *req = tevent_req_callback_data(subreq,
+                                struct tevent_req);
+       struct smbd_smb2_read_state *state = tevent_req_data(req,
+                                            struct smbd_smb2_read_state);
+       files_struct *fsp = state->fsp;
+       NTSTATUS status;
+       ssize_t nread;
+       struct vfs_aio_state vfs_aio_state = { 0 };
+
+       state->vfs_io_subreq = NULL;
+
+       nread = SMB_VFS_IO_READ_RECV(subreq, &vfs_aio_state);
+       TALLOC_FREE(subreq);
+
+       DBG_DEBUG("io_read_recv returned %d, err = %s\n", (int)nread,
+                 (nread == -1) ? strerror(vfs_aio_state.error) : "no error");
+
+       /* Common error or success code processing for async or sync
+          read returns. */
+
+       status = smb2_read_complete(req, nread, vfs_aio_state.error);
+
+       if (nread > 0) {
+               uint64_t newpos = state->in_offset + nread;
+
+               state->vfs_io_length = nread;
+
+               fh_set_pos(fsp->fh, newpos);
+               fh_set_position_information(fsp->fh, newpos);
+       }
+
+       DBG_DEBUG("SMB_VFS_IO_READ_RECV completed "
+                 "for file %s, offset %.0f, len = %u "
+                 "(errcode = %d, NTSTATUS = %s)\n",
+                 fsp_str_dbg(fsp),
+                 (double)state->in_offset,
+                 (unsigned int)nread,
+                 vfs_aio_state.error, nt_errstr(status));
+
+       if (!NT_STATUS_IS_OK(status)) {
+               tevent_req_nterror(req, status);
+               return;
+       }
+       tevent_req_done(req);
+}
+
 static NTSTATUS smbd_smb2_read_recv(struct tevent_req *req,
                                    TALLOC_CTX *mem_ctx,
                                    DATA_BLOB *out_data,
@@ -670,12 +816,18 @@ static NTSTATUS smbd_smb2_read_recv(struct tevent_req *req,
        talloc_steal(mem_ctx, out_data->data);
        *out_remaining = state->out_remaining;
 
+       if (state->vfs_io_length > 0) {
+               state->smb2req->queue_entry.vfs_io = talloc_move(mem_ctx, &state->vfs_io);
+               state->smb2req->queue_entry.vfs_io_size = state->vfs_io_length;
+       }
+
        if (state->out_headers.length > 0) {
                talloc_steal(mem_ctx, state);
                talloc_set_destructor(state, smb2_smb2_read_state_deny_destructor);
                tevent_req_received(req);
                state->smb2req->queue_entry.sendfile_header = &state->out_headers;
-               state->smb2req->queue_entry.sendfile_body_size = state->in_length;
+               state->smb2req->queue_entry.sendfile_body_size = state->out_data.length;
+               state->smb2req->queue_entry.vfs_io = state->vfs_io;
                talloc_set_destructor(state, smb2_sendfile_send_data);
        } else {
                tevent_req_received(req);
index a5229bae49d6db763f837c87a58169bcaa38462a..d30fd8e57e536079e13cdd80981245a92cdbe038 100644 (file)
@@ -4997,13 +4997,26 @@ static NTSTATUS smbd_smb2_advance_send_queue(struct smbXsrv_connection *xconn,
 
        xconn->ack.unacked_bytes += n;
 
-       ok = iov_advance(&e->vector, &e->count, n);
-       if (!ok) {
-               return NT_STATUS_INTERNAL_ERROR;
+               DBG_DEBUG("n[%zu] e[%p]->io_uring.pending_snd[%u] e->io_uring.pending_zc[%zu]\n",
+                            n, e, e->io_uring.pending_snd, e->io_uring.pending_zc);
+       debug_iovec(e->vector, e->count);
+
+       if (e->count > 0) {
+               ok = iov_advance(&e->vector, &e->count, n);
+               if (!ok) {
+                       return NT_STATUS_INTERNAL_ERROR;
+               }
+       } else if (e->vfs_io_size > 0) {
+               if (n > e->vfs_io_size) {
+                       return NT_STATUS_INTERNAL_ERROR;
+               }
+               e->vfs_io_size -= n;
        }
 
        if (e->count > 0) {
                return NT_STATUS_RETRY;
+       } else if (e->vfs_io_size > 0) {
+               return NT_STATUS_RETRY;
        }
 
        xconn->smb2.send_queue_len--;
@@ -5160,6 +5173,10 @@ static NTSTATUS smbd_smb2_flush_with_io_uring(struct smbXsrv_connection *xconn)
                        struct io_uring_sqe *sqe = NULL;
                        ssize_t blen;
 
+                       if (e->vfs_io_size > 0) {
+                               sendmsg_flags |= MSG_MORE;
+                       }
+
                        e->msg = (struct msghdr) {
                                .msg_iov = e->vector,
                                .msg_iovlen = e->count,
@@ -5188,6 +5205,22 @@ static NTSTATUS smbd_smb2_flush_with_io_uring(struct smbXsrv_connection *xconn)
 
                        e->io_uring.pending_snd = true;
                        submission = &e->io_uring.submission;
+               } else if (e->vfs_io_size > 0) {
+                       struct samba_io_uring_qe *splice_qe = NULL;
+                       int io_output_fd = smb_vfs_io_output_fd(e->vfs_io);
+
+                       splice_qe = &e->io_uring.qes[e->io_uring.num_qes++];
+                       io_uring_prep_splice(&splice_qe->sqe,
+                                            io_output_fd, -1,
+                                            xconn->transport.sock, -1,
+                                            e->vfs_io_size,
+                                            SPLICE_F_MOVE);
+                       /*
+                        * Note splice always runs in an async helper thread
+                        */
+                       splice_qe->private_data = e;
+                       splice_qe->submission_fn = smbd_smb2_noop_submission_io_uring;
+                       splice_qe->completion_fn = smbd_smb2_flush_completion_io_uring;
                } else {
                        smb_panic(__location__);
                }