2 * Simulate Posix AIO using pthreads.
4 * Based on the aio_fork work from Volker and Volker's pthreadpool library.
6 * Copyright (C) Volker Lendecke 2008
7 * Copyright (C) Jeremy Allison 2012
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 3 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "system/filesys.h"
26 #include "system/shmem.h"
27 #include "smbd/smbd.h"
28 #include "lib/pthreadpool/pthreadpool.h"
31 static struct pthreadpool *pool;
32 static int aio_pthread_jobid;
34 struct aio_private_data {
35 struct aio_private_data *prev, *next;
37 SMB_STRUCT_AIOCB *aiocb;
44 /* List of outstanding requests we have. */
45 static struct aio_private_data *pd_list;
47 static void aio_pthread_handle_completion(struct event_context *event_ctx,
48 struct fd_event *event,
52 /************************************************************************
53 How many threads to initialize ?
54 100 per process seems insane as a default until you realize that
55 (a) Threads terminate after 1 second when idle.
56 (b) Throttling is done in SMB2 via the crediting algorithm.
57 (c) SMB1 clients are limited to max_mux (50) outstanding requests and
58 Windows clients don't use this anyway.
59 Essentially we want this to be unlimited unless smb.conf says different.
60 ***********************************************************************/
62 static int aio_get_num_threads(struct vfs_handle_struct *handle)
64 return lp_parm_int(SNUM(handle->conn),
65 "aio_pthread", "aio num threads", 100);
68 /************************************************************************
69 Ensure thread pool is initialized.
70 ***********************************************************************/
72 static bool init_aio_threadpool(struct vfs_handle_struct *handle)
74 struct fd_event *sock_event = NULL;
82 num_threads = aio_get_num_threads(handle);
83 ret = pthreadpool_init(num_threads, &pool);
88 sock_event = tevent_add_fd(server_event_context(),
90 pthreadpool_signal_fd(pool),
92 aio_pthread_handle_completion,
94 if (sock_event == NULL) {
95 pthreadpool_destroy(pool);
100 DEBUG(10,("init_aio_threadpool: initialized with up to %d threads\n",
107 /************************************************************************
108 Worker function - core of the pthread aio engine.
109 This is the function that actually does the IO.
110 ***********************************************************************/
112 static void aio_worker(void *private_data)
114 struct aio_private_data *pd =
115 (struct aio_private_data *)private_data;
117 if (pd->write_command) {
118 pd->ret_size = pwrite(pd->aiocb->aio_fildes,
119 (const void *)pd->aiocb->aio_buf,
120 pd->aiocb->aio_nbytes,
121 pd->aiocb->aio_offset);
123 pd->ret_size = pread(pd->aiocb->aio_fildes,
124 (void *)pd->aiocb->aio_buf,
125 pd->aiocb->aio_nbytes,
126 pd->aiocb->aio_offset);
128 if (pd->ret_size == -1) {
129 pd->ret_errno = errno;
135 /************************************************************************
136 Private data destructor.
137 ***********************************************************************/
139 static int pd_destructor(struct aio_private_data *pd)
141 DLIST_REMOVE(pd_list, pd);
145 /************************************************************************
146 Create and initialize a private data struct.
147 ***********************************************************************/
149 static struct aio_private_data *create_private_data(TALLOC_CTX *ctx,
150 SMB_STRUCT_AIOCB *aiocb)
152 struct aio_private_data *pd = talloc_zero(ctx, struct aio_private_data);
156 pd->jobid = aio_pthread_jobid++;
159 pd->ret_errno = EINPROGRESS;
160 talloc_set_destructor(pd, pd_destructor);
161 DLIST_ADD_END(pd_list, pd, struct aio_private_data *);
165 /************************************************************************
166 Spin off a threadpool (if needed) and initiate a pread call.
167 ***********************************************************************/
169 static int aio_pthread_read(struct vfs_handle_struct *handle,
170 struct files_struct *fsp,
171 SMB_STRUCT_AIOCB *aiocb)
173 struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
174 struct aio_private_data *pd = NULL;
177 if (!init_aio_threadpool(handle)) {
181 pd = create_private_data(aio_ex, aiocb);
183 DEBUG(10, ("aio_pthread_read: Could not create private data.\n"));
187 ret = pthreadpool_add_job(pool, pd->jobid, aio_worker, (void *)pd);
193 DEBUG(10, ("aio_pthread_read: jobid=%d pread requested "
194 "of %llu bytes at offset %llu\n",
196 (unsigned long long)pd->aiocb->aio_nbytes,
197 (unsigned long long)pd->aiocb->aio_offset));
202 /************************************************************************
203 Spin off a threadpool (if needed) and initiate a pwrite call.
204 ***********************************************************************/
206 static int aio_pthread_write(struct vfs_handle_struct *handle,
207 struct files_struct *fsp,
208 SMB_STRUCT_AIOCB *aiocb)
210 struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
211 struct aio_private_data *pd = NULL;
214 if (!init_aio_threadpool(handle)) {
218 pd = create_private_data(aio_ex, aiocb);
220 DEBUG(10, ("aio_pthread_write: Could not create private data.\n"));
224 pd->write_command = true;
226 ret = pthreadpool_add_job(pool, pd->jobid, aio_worker, (void *)pd);
232 DEBUG(10, ("aio_pthread_write: jobid=%d pwrite requested "
233 "of %llu bytes at offset %llu\n",
235 (unsigned long long)pd->aiocb->aio_nbytes,
236 (unsigned long long)pd->aiocb->aio_offset));
241 /************************************************************************
242 Find the private data by jobid.
243 ***********************************************************************/
245 static struct aio_private_data *find_private_data_by_jobid(int jobid)
247 struct aio_private_data *pd;
249 for (pd = pd_list; pd != NULL; pd = pd->next) {
250 if (pd->jobid == jobid) {
258 /************************************************************************
259 Callback when an IO completes.
260 ***********************************************************************/
262 static void aio_pthread_handle_completion(struct event_context *event_ctx,
263 struct fd_event *event,
267 struct aio_extra *aio_ex = NULL;
268 struct aio_private_data *pd = NULL;
272 DEBUG(10, ("aio_pthread_handle_completion called with flags=%d\n",
275 if ((flags & EVENT_FD_READ) == 0) {
279 ret = pthreadpool_finished_job(pool, &jobid);
281 smb_panic("aio_pthread_handle_completion");
285 pd = find_private_data_by_jobid(jobid);
287 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
292 aio_ex = (struct aio_extra *)pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
293 smbd_aio_complete_aio_ex(aio_ex);
295 DEBUG(10,("aio_pthread_handle_completion: jobid %d completed\n",
300 /************************************************************************
301 Find the private data by aiocb.
302 ***********************************************************************/
304 static struct aio_private_data *find_private_data_by_aiocb(SMB_STRUCT_AIOCB *aiocb)
306 struct aio_private_data *pd;
308 for (pd = pd_list; pd != NULL; pd = pd->next) {
309 if (pd->aiocb == aiocb) {
317 /************************************************************************
318 Called to return the result of a completed AIO.
319 Should only be called if aio_error returns something other than EINPROGRESS.
321 Any other value - return from IO operation.
322 ***********************************************************************/
324 static ssize_t aio_pthread_return_fn(struct vfs_handle_struct *handle,
325 struct files_struct *fsp,
326 SMB_STRUCT_AIOCB *aiocb)
328 struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
332 DEBUG(0, ("aio_pthread_return_fn: returning EINVAL\n"));
338 if (pd->ret_size == -1) {
339 errno = pd->ret_errno;
345 /************************************************************************
346 Called to check the result of an AIO.
348 EINPROGRESS - still in progress.
349 EINVAL - invalid aiocb.
350 ECANCELED - request was cancelled.
351 0 - request completed successfully.
352 Any other value - errno from IO operation.
353 ***********************************************************************/
355 static int aio_pthread_error_fn(struct vfs_handle_struct *handle,
356 struct files_struct *fsp,
357 SMB_STRUCT_AIOCB *aiocb)
359 struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
367 return pd->ret_errno;
370 /************************************************************************
371 Called to request the cancel of an AIO, or all of them on a specific
372 fsp if aiocb == NULL.
373 ***********************************************************************/
375 static int aio_pthread_cancel(struct vfs_handle_struct *handle,
376 struct files_struct *fsp,
377 SMB_STRUCT_AIOCB *aiocb)
379 struct aio_private_data *pd = NULL;
381 for (pd = pd_list; pd != NULL; pd = pd->next) {
382 if (pd->aiocb == NULL) {
385 if (pd->aiocb->aio_fildes != fsp->fh->fd) {
388 if ((aiocb != NULL) && (pd->aiocb != aiocb)) {
393 * We let the child do its job, but we discard the result when
397 pd->cancelled = true;
403 /************************************************************************
404 Callback for a previously detected job completion.
405 ***********************************************************************/
407 static void aio_pthread_handle_immediate(struct tevent_context *ctx,
408 struct tevent_immediate *im,
411 struct aio_extra *aio_ex = NULL;
412 int *pjobid = (int *)private_data;
413 struct aio_private_data *pd = find_private_data_by_jobid(*pjobid);
416 DEBUG(1, ("aio_pthread_handle_immediate cannot find jobid %d\n",
423 aio_ex = (struct aio_extra *)pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
424 smbd_aio_complete_aio_ex(aio_ex);
428 /************************************************************************
429 Private data struct used in suspend completion code.
430 ***********************************************************************/
432 struct suspend_private {
435 const SMB_STRUCT_AIOCB * const *aiocb_array;
438 /************************************************************************
439 Callback when an IO completes from a suspend call.
440 ***********************************************************************/
442 static void aio_pthread_handle_suspend_completion(struct event_context *event_ctx,
443 struct fd_event *event,
447 struct suspend_private *sp = (struct suspend_private *)p;
448 struct aio_private_data *pd = NULL;
449 struct tevent_immediate *im = NULL;
453 DEBUG(10, ("aio_pthread_handle_suspend_completion called with flags=%d\n",
456 if ((flags & EVENT_FD_READ) == 0) {
460 pjobid = talloc_array(NULL, int, 1);
462 smb_panic("aio_pthread_handle_suspend_completion: no memory.");
465 if (pthreadpool_finished_job(pool, pjobid)) {
466 smb_panic("aio_pthread_handle_suspend_completion: can't find job.");
470 pd = find_private_data_by_jobid(*pjobid);
472 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
478 /* Is this a jobid with an aiocb we're interested in ? */
479 for (i = 0; i < sp->num_entries; i++) {
480 if (sp->aiocb_array[i] == pd->aiocb) {
487 /* Jobid completed we weren't waiting for.
488 We must reshedule this as an immediate event
489 on the main event context. */
490 im = tevent_create_immediate(NULL);
492 exit_server_cleanly("aio_pthread_handle_suspend_completion: no memory");
495 DEBUG(10,("aio_pthread_handle_suspend_completion: "
496 "re-scheduling job id %d\n",
499 tevent_schedule_immediate(im,
500 server_event_context(),
501 aio_pthread_handle_immediate,
506 static void aio_pthread_suspend_timed_out(struct tevent_context *event_ctx,
507 struct tevent_timer *te,
511 bool *timed_out = (bool *)private_data;
512 /* Remove this timed event handler. */
517 /************************************************************************
518 Called to request everything to stop until all IO is completed.
519 ***********************************************************************/
521 static int aio_pthread_suspend(struct vfs_handle_struct *handle,
522 struct files_struct *fsp,
523 const SMB_STRUCT_AIOCB * const aiocb_array[],
525 const struct timespec *timeout)
527 struct event_context *ev = NULL;
528 struct fd_event *sock_event = NULL;
530 struct suspend_private sp;
531 bool timed_out = false;
532 TALLOC_CTX *frame = talloc_stackframe();
534 /* This is a blocking call, and has to use a sub-event loop. */
535 ev = event_context_init(frame);
542 struct timeval tv = convert_timespec_to_timeval(*timeout);
543 struct tevent_timer *te = tevent_add_timer(ev,
545 timeval_current_ofs(tv.tv_sec,
547 aio_pthread_suspend_timed_out,
557 sp.aiocb_array = aiocb_array;
560 sock_event = tevent_add_fd(ev,
562 pthreadpool_signal_fd(pool),
564 aio_pthread_handle_suspend_completion,
566 if (sock_event == NULL) {
567 pthreadpool_destroy(pool);
572 * We're going to cheat here. We know that smbd/aio.c
573 * only calls this when it's waiting for every single
574 * outstanding call to finish on a close, so just wait
575 * individually for each IO to complete. We don't care
576 * what order they finish - only that they all do. JRA.
578 while (sp.num_entries != sp.num_finished) {
579 if (tevent_loop_once(ev) == -1) {
597 static struct vfs_fn_pointers vfs_aio_pthread_fns = {
598 .aio_read_fn = aio_pthread_read,
599 .aio_write_fn = aio_pthread_write,
600 .aio_return_fn = aio_pthread_return_fn,
601 .aio_cancel_fn = aio_pthread_cancel,
602 .aio_error_fn = aio_pthread_error_fn,
603 .aio_suspend_fn = aio_pthread_suspend,
606 NTSTATUS vfs_aio_pthread_init(void);
607 NTSTATUS vfs_aio_pthread_init(void)
609 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
610 "aio_pthread", &vfs_aio_pthread_fns);