2 * Simulate Posix AIO using pthreads.
4 * Based on the aio_fork work from Volker and Volker's pthreadpool library.
6 * Copyright (C) Volker Lendecke 2008
7 * Copyright (C) Jeremy Allison 2012
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 3 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include "system/filesys.h"
26 #include "system/shmem.h"
27 #include "smbd/smbd.h"
28 #include "pthreadpool.h"
31 static struct pthreadpool *pool;
32 static int aio_pthread_jobid;
34 struct aio_private_data {
35 struct aio_private_data *prev, *next;
37 SMB_STRUCT_AIOCB *aiocb;
44 /* List of outstanding requests we have. */
45 struct aio_private_data *pd_list;
47 static void aio_pthread_handle_completion(struct event_context *event_ctx,
48 struct fd_event *event,
52 /************************************************************************
53 How many threads to initialize ?
54 ***********************************************************************/
56 static int aio_get_num_threads(void)
62 /************************************************************************
63 Called every 30 seconds to destroy pool if it's idle.
64 ***********************************************************************/
66 static void idle_pool_destroy_timer(struct tevent_context *ev,
67 struct tevent_timer *te,
68 struct timeval current_time,
75 if (pool && pd_list == NULL) {
76 if (pthreadpool_destroy(pool) == 0) {
79 DEBUG(10,("idle_pool_destroy_timer: destroyed AIO pool.\n"));
83 /* Here, the IO is still active. */
85 /* Set an event up for 30 seconds time - if we have
86 no outstanding IO at this time shut the threadpool
88 ne = tevent_timeval_current_ofs(30, 0);
89 tevent_add_timer(server_event_context(),
92 idle_pool_destroy_timer,
97 /************************************************************************
98 Ensure thread pool is initialized.
99 ***********************************************************************/
101 static bool init_aio_threadpool(void)
103 struct fd_event *sock_event = NULL;
105 int num_threads = aio_get_num_threads();
114 ret = pthreadpool_init(num_threads, &pool);
119 sock_event = tevent_add_fd(server_event_context(),
121 pthreadpool_signal_fd(pool),
123 aio_pthread_handle_completion,
125 if (sock_event == NULL) {
126 pthreadpool_destroy(pool);
132 /* Set an event up for 30 seconds time - if we have
133 no outstanding IO at this time shut the threadpool
135 ne = tevent_timeval_current_ofs(30, 0);
136 tevent_add_timer(server_event_context(),
139 idle_pool_destroy_timer,
143 DEBUG(10,("init_aio_threadpool: initialized with %d threads\n",
150 /************************************************************************
151 Worker function - core of the pthread aio engine.
152 This is the function that actually does the IO.
153 ***********************************************************************/
155 static void aio_worker(void *private_data)
157 struct aio_private_data *pd =
158 (struct aio_private_data *)private_data;
160 if (pd->write_command) {
161 pd->ret_size = pwrite(pd->aiocb->aio_fildes,
162 (const void *)pd->aiocb->aio_buf,
163 pd->aiocb->aio_nbytes,
164 pd->aiocb->aio_offset);
166 pd->ret_size = pread(pd->aiocb->aio_fildes,
167 (void *)pd->aiocb->aio_buf,
168 pd->aiocb->aio_nbytes,
169 pd->aiocb->aio_offset);
171 if (pd->ret_size == -1) {
172 pd->ret_errno = errno;
178 /************************************************************************
179 Private data destructor.
180 ***********************************************************************/
182 static int pd_destructor(struct aio_private_data *pd)
184 DLIST_REMOVE(pd_list, pd);
188 /************************************************************************
189 Create and initialize a private data struct.
190 ***********************************************************************/
192 static struct aio_private_data *create_private_data(TALLOC_CTX *ctx,
193 SMB_STRUCT_AIOCB *aiocb)
195 struct aio_private_data *pd = talloc_zero(ctx, struct aio_private_data);
199 pd->jobid = aio_pthread_jobid++;
202 pd->ret_errno = EINPROGRESS;
203 talloc_set_destructor(pd, pd_destructor);
204 DLIST_ADD_END(pd_list, pd, struct aio_private_data *);
208 /************************************************************************
209 Spin off a threadpool (if needed) and initiate a pread call.
210 ***********************************************************************/
212 static int aio_pthread_read(struct vfs_handle_struct *handle,
213 struct files_struct *fsp,
214 SMB_STRUCT_AIOCB *aiocb)
216 struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
217 struct aio_private_data *pd = NULL;
220 if (!init_aio_threadpool()) {
224 pd = create_private_data(aio_ex, aiocb);
226 DEBUG(10, ("aio_pthread_read: Could not create private data.\n"));
230 ret = pthreadpool_add_job(pool, pd->jobid, aio_worker, (void *)pd);
236 DEBUG(10, ("aio_pthread_read: jobid=%d pread requested "
237 "of %llu bytes at offset %llu\n",
239 (unsigned long long)pd->aiocb->aio_nbytes,
240 (unsigned long long)pd->aiocb->aio_offset));
245 /************************************************************************
246 Spin off a threadpool (if needed) and initiate a pwrite call.
247 ***********************************************************************/
249 static int aio_pthread_write(struct vfs_handle_struct *handle,
250 struct files_struct *fsp,
251 SMB_STRUCT_AIOCB *aiocb)
253 struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
254 struct aio_private_data *pd = NULL;
257 if (!init_aio_threadpool()) {
261 pd = create_private_data(aio_ex, aiocb);
263 DEBUG(10, ("aio_pthread_write: Could not create private data.\n"));
267 pd->write_command = true;
269 ret = pthreadpool_add_job(pool, pd->jobid, aio_worker, (void *)pd);
275 DEBUG(10, ("aio_pthread_write: jobid=%d pwrite requested "
276 "of %llu bytes at offset %llu\n",
278 (unsigned long long)pd->aiocb->aio_nbytes,
279 (unsigned long long)pd->aiocb->aio_offset));
284 /************************************************************************
285 Find the private data by jobid.
286 ***********************************************************************/
288 static struct aio_private_data *find_private_data_by_jobid(int jobid)
290 struct aio_private_data *pd;
292 for (pd = pd_list; pd != NULL; pd = pd->next) {
293 if (pd->jobid == jobid) {
301 /************************************************************************
302 Callback when an IO completes.
303 ***********************************************************************/
305 static void aio_pthread_handle_completion(struct event_context *event_ctx,
306 struct fd_event *event,
310 struct aio_extra *aio_ex = NULL;
311 struct aio_private_data *pd = NULL;
315 DEBUG(10, ("aio_pthread_handle_completion called with flags=%d\n",
318 if ((flags & EVENT_FD_READ) == 0) {
322 ret = pthreadpool_finished_job(pool, &jobid);
324 smb_panic("aio_pthread_handle_completion");
328 pd = find_private_data_by_jobid(jobid);
330 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
335 aio_ex = (struct aio_extra *)pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
336 smbd_aio_complete_aio_ex(aio_ex);
338 DEBUG(10,("aio_pthread_handle_completion: jobid %d completed\n",
343 /************************************************************************
344 Find the private data by aiocb.
345 ***********************************************************************/
347 static struct aio_private_data *find_private_data_by_aiocb(SMB_STRUCT_AIOCB *aiocb)
349 struct aio_private_data *pd;
351 for (pd = pd_list; pd != NULL; pd = pd->next) {
352 if (pd->aiocb == aiocb) {
360 /************************************************************************
361 Called to return the result of a completed AIO.
362 Should only be called if aio_error returns something other than EINPROGRESS.
364 Any other value - return from IO operation.
365 ***********************************************************************/
367 static ssize_t aio_pthread_return_fn(struct vfs_handle_struct *handle,
368 struct files_struct *fsp,
369 SMB_STRUCT_AIOCB *aiocb)
371 struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
375 DEBUG(0, ("aio_pthread_return_fn: returning EINVAL\n"));
381 if (pd->ret_size == -1) {
382 errno = pd->ret_errno;
388 /************************************************************************
389 Called to check the result of an AIO.
391 EINPROGRESS - still in progress.
392 EINVAL - invalid aiocb.
393 ECANCELED - request was cancelled.
394 0 - request completed successfully.
395 Any other value - errno from IO operation.
396 ***********************************************************************/
398 static int aio_pthread_error_fn(struct vfs_handle_struct *handle,
399 struct files_struct *fsp,
400 SMB_STRUCT_AIOCB *aiocb)
402 struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
410 return pd->ret_errno;
413 /************************************************************************
414 Called to request the cancel of an AIO, or all of them on a specific
415 fsp if aiocb == NULL.
416 ***********************************************************************/
418 static int aio_pthread_cancel(struct vfs_handle_struct *handle,
419 struct files_struct *fsp,
420 SMB_STRUCT_AIOCB *aiocb)
422 struct aio_private_data *pd = NULL;
424 for (pd = pd_list; pd != NULL; pd = pd->next) {
425 if (pd->aiocb == NULL) {
428 if (pd->aiocb->aio_fildes != fsp->fh->fd) {
431 if ((aiocb != NULL) && (pd->aiocb != aiocb)) {
436 * We let the child do its job, but we discard the result when
440 pd->cancelled = true;
446 /************************************************************************
447 Callback for a previously detected job completion.
448 ***********************************************************************/
450 static void aio_pthread_handle_immediate(struct tevent_context *ctx,
451 struct tevent_immediate *im,
454 struct aio_extra *aio_ex = NULL;
455 int *pjobid = (int *)private_data;
456 struct aio_private_data *pd = find_private_data_by_jobid(*pjobid);
459 DEBUG(1, ("aio_pthread_handle_immediate cannot find jobid %d\n",
466 aio_ex = (struct aio_extra *)pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
467 smbd_aio_complete_aio_ex(aio_ex);
470 /************************************************************************
471 Private data struct used in suspend completion code.
472 ***********************************************************************/
474 struct suspend_private {
477 const SMB_STRUCT_AIOCB * const *aiocb_array;
480 /************************************************************************
481 Callback when an IO completes from a suspend call.
482 ***********************************************************************/
484 static void aio_pthread_handle_suspend_completion(struct event_context *event_ctx,
485 struct fd_event *event,
489 struct suspend_private *sp = (struct suspend_private *)p;
490 struct aio_private_data *pd = NULL;
491 struct tevent_immediate *im = NULL;
495 DEBUG(10, ("aio_pthread_handle_suspend_completion called with flags=%d\n",
498 if ((flags & EVENT_FD_READ) == 0) {
502 pjobid = talloc_array(NULL, int, 1);
504 smb_panic("aio_pthread_handle_suspend_completion: no memory.");
507 if (pthreadpool_finished_job(pool, pjobid)) {
508 smb_panic("aio_pthread_handle_suspend_completion: can't find job.");
512 pd = find_private_data_by_jobid(*pjobid);
514 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
520 /* Is this a jobid with an aiocb we're interested in ? */
521 for (i = 0; i < sp->num_entries; i++) {
522 if (sp->aiocb_array[i] == pd->aiocb) {
529 /* Jobid completed we weren't waiting for.
530 We must reshedule this as an immediate event
531 on the main event context. */
532 im = tevent_create_immediate(NULL);
534 exit_server_cleanly("aio_pthread_handle_suspend_completion: no memory");
537 DEBUG(10,("aio_pthread_handle_suspend_completion: "
538 "re-scheduling job id %d\n",
541 tevent_schedule_immediate(im,
542 server_event_context(),
543 aio_pthread_handle_immediate,
548 static void aio_pthread_suspend_timed_out(struct tevent_context *event_ctx,
549 struct tevent_timer *te,
553 bool *timed_out = (bool *)private_data;
554 /* Remove this timed event handler. */
559 /************************************************************************
560 Called to request everything to stop until all IO is completed.
561 ***********************************************************************/
563 static int aio_pthread_suspend(struct vfs_handle_struct *handle,
564 struct files_struct *fsp,
565 const SMB_STRUCT_AIOCB * const aiocb_array[],
567 const struct timespec *timeout)
569 struct event_context *ev = NULL;
570 struct fd_event *sock_event = NULL;
572 struct suspend_private sp;
573 bool timed_out = false;
574 TALLOC_CTX *frame = talloc_stackframe();
576 /* This is a blocking call, and has to use a sub-event loop. */
577 ev = event_context_init(frame);
584 struct timeval tv = convert_timespec_to_timeval(*timeout);
585 struct tevent_timer *te = tevent_add_timer(ev,
587 timeval_current_ofs(tv.tv_sec,
589 aio_pthread_suspend_timed_out,
599 sp.aiocb_array = aiocb_array;
602 sock_event = tevent_add_fd(ev,
604 pthreadpool_signal_fd(pool),
606 aio_pthread_handle_suspend_completion,
608 if (sock_event == NULL) {
609 pthreadpool_destroy(pool);
614 * We're going to cheat here. We know that smbd/aio.c
615 * only calls this when it's waiting for every single
616 * outstanding call to finish on a close, so just wait
617 * individually for each IO to complete. We don't care
618 * what order they finish - only that they all do. JRA.
620 while (sp.num_entries != sp.num_finished) {
621 if (tevent_loop_once(ev) == -1) {
639 static struct vfs_fn_pointers vfs_aio_pthread_fns = {
640 .aio_read_fn = aio_pthread_read,
641 .aio_write_fn = aio_pthread_write,
642 .aio_return_fn = aio_pthread_return_fn,
643 .aio_cancel_fn = aio_pthread_cancel,
644 .aio_error_fn = aio_pthread_error_fn,
645 .aio_suspend_fn = aio_pthread_suspend,
648 NTSTATUS vfs_aio_pthread_init(void);
649 NTSTATUS vfs_aio_pthread_init(void)
651 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
652 "aio_pthread", &vfs_aio_pthread_fns);