Ensure we always free aio_ex on all error paths by moving the TALLOC_FREE
[samba.git] / source3 / modules / vfs_aio_pthread.c
1 /*
2  * Simulate Posix AIO using pthreads.
3  *
4  * Based on the aio_fork work from Volker and Volker's pthreadpool library.
5  *
6  * Copyright (C) Volker Lendecke 2008
7  * Copyright (C) Jeremy Allison 2012
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 3 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "system/shmem.h"
27 #include "smbd/smbd.h"
28 #include "lib/pthreadpool/pthreadpool.h"
29
30 struct aio_extra;
31 static struct pthreadpool *pool;
32 static int aio_pthread_jobid;
33
34 struct aio_private_data {
35         struct aio_private_data *prev, *next;
36         int jobid;
37         SMB_STRUCT_AIOCB *aiocb;
38         ssize_t ret_size;
39         int ret_errno;
40         bool cancelled;
41         bool write_command;
42 };
43
44 /* List of outstanding requests we have. */
45 static struct aio_private_data *pd_list;
46
47 static void aio_pthread_handle_completion(struct event_context *event_ctx,
48                                 struct fd_event *event,
49                                 uint16 flags,
50                                 void *p);
51
52 /************************************************************************
53  How many threads to initialize ?
54  100 per process seems insane as a default until you realize that
55  (a) Threads terminate after 1 second when idle.
56  (b) Throttling is done in SMB2 via the crediting algorithm.
57  (c) SMB1 clients are limited to max_mux (50) outstanding requests and
58      Windows clients don't use this anyway.
59  Essentially we want this to be unlimited unless smb.conf says different.
60 ***********************************************************************/
61
62 static int aio_get_num_threads(struct vfs_handle_struct *handle)
63 {
64         return lp_parm_bool(SNUM(handle->conn),
65                                 "aio_pthread",
66                                 "aio num threads",
67                                 100);
68 }
69
70 /************************************************************************
71  Ensure thread pool is initialized.
72 ***********************************************************************/
73
74 static bool init_aio_threadpool(struct vfs_handle_struct *handle)
75 {
76         struct fd_event *sock_event = NULL;
77         int ret = 0;
78         int num_threads;
79
80         if (pool) {
81                 return true;
82         }
83
84         num_threads = aio_get_num_threads(handle);
85         ret = pthreadpool_init(num_threads, &pool);
86         if (ret) {
87                 errno = ret;
88                 return false;
89         }
90         sock_event = tevent_add_fd(server_event_context(),
91                                 NULL,
92                                 pthreadpool_signal_fd(pool),
93                                 TEVENT_FD_READ,
94                                 aio_pthread_handle_completion,
95                                 NULL);
96         if (sock_event == NULL) {
97                 pthreadpool_destroy(pool);
98                 pool = NULL;
99                 return false;
100         }
101
102         DEBUG(10,("init_aio_threadpool: initialized with up to %d threads\n",
103                         num_threads));
104
105         return true;
106 }
107
108
109 /************************************************************************
110  Worker function - core of the pthread aio engine.
111  This is the function that actually does the IO.
112 ***********************************************************************/
113
114 static void aio_worker(void *private_data)
115 {
116         struct aio_private_data *pd =
117                         (struct aio_private_data *)private_data;
118
119         if (pd->write_command) {
120                 pd->ret_size = pwrite(pd->aiocb->aio_fildes,
121                                 (const void *)pd->aiocb->aio_buf,
122                                 pd->aiocb->aio_nbytes,
123                                 pd->aiocb->aio_offset);
124         } else {
125                 pd->ret_size = pread(pd->aiocb->aio_fildes,
126                                 (void *)pd->aiocb->aio_buf,
127                                 pd->aiocb->aio_nbytes,
128                                 pd->aiocb->aio_offset);
129         }
130         if (pd->ret_size == -1) {
131                 pd->ret_errno = errno;
132         } else {
133                 pd->ret_errno = 0;
134         }
135 }
136
137 /************************************************************************
138  Private data destructor.
139 ***********************************************************************/
140
141 static int pd_destructor(struct aio_private_data *pd)
142 {
143         DLIST_REMOVE(pd_list, pd);
144         return 0;
145 }
146
147 /************************************************************************
148  Create and initialize a private data struct.
149 ***********************************************************************/
150
151 static struct aio_private_data *create_private_data(TALLOC_CTX *ctx,
152                                         SMB_STRUCT_AIOCB *aiocb)
153 {
154         struct aio_private_data *pd = talloc_zero(ctx, struct aio_private_data);
155         if (!pd) {
156                 return NULL;
157         }
158         pd->jobid = aio_pthread_jobid++;
159         pd->aiocb = aiocb;
160         pd->ret_size = -1;
161         pd->ret_errno = EINPROGRESS;
162         talloc_set_destructor(pd, pd_destructor);
163         DLIST_ADD_END(pd_list, pd, struct aio_private_data *);
164         return pd;
165 }
166
167 /************************************************************************
168  Spin off a threadpool (if needed) and initiate a pread call.
169 ***********************************************************************/
170
171 static int aio_pthread_read(struct vfs_handle_struct *handle,
172                                 struct files_struct *fsp,
173                                 SMB_STRUCT_AIOCB *aiocb)
174 {
175         struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
176         struct aio_private_data *pd = NULL;
177         int ret;
178
179         if (!init_aio_threadpool(handle)) {
180                 return -1;
181         }
182
183         pd = create_private_data(aio_ex, aiocb);
184         if (pd == NULL) {
185                 DEBUG(10, ("aio_pthread_read: Could not create private data.\n"));
186                 return -1;
187         }
188
189         ret = pthreadpool_add_job(pool, pd->jobid, aio_worker, (void *)pd);
190         if (ret) {
191                 errno = ret;
192                 return -1;
193         }
194
195         DEBUG(10, ("aio_pthread_read: jobid=%d pread requested "
196                 "of %llu bytes at offset %llu\n",
197                 pd->jobid,
198                 (unsigned long long)pd->aiocb->aio_nbytes,
199                 (unsigned long long)pd->aiocb->aio_offset));
200
201         return 0;
202 }
203
204 /************************************************************************
205  Spin off a threadpool (if needed) and initiate a pwrite call.
206 ***********************************************************************/
207
208 static int aio_pthread_write(struct vfs_handle_struct *handle,
209                                 struct files_struct *fsp,
210                                 SMB_STRUCT_AIOCB *aiocb)
211 {
212         struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
213         struct aio_private_data *pd = NULL;
214         int ret;
215
216         if (!init_aio_threadpool(handle)) {
217                 return -1;
218         }
219
220         pd = create_private_data(aio_ex, aiocb);
221         if (pd == NULL) {
222                 DEBUG(10, ("aio_pthread_write: Could not create private data.\n"));
223                 return -1;
224         }
225
226         pd->write_command = true;
227
228         ret = pthreadpool_add_job(pool, pd->jobid, aio_worker, (void *)pd);
229         if (ret) {
230                 errno = ret;
231                 return -1;
232         }
233
234         DEBUG(10, ("aio_pthread_write: jobid=%d pwrite requested "
235                 "of %llu bytes at offset %llu\n",
236                 pd->jobid,
237                 (unsigned long long)pd->aiocb->aio_nbytes,
238                 (unsigned long long)pd->aiocb->aio_offset));
239
240         return 0;
241 }
242
243 /************************************************************************
244  Find the private data by jobid.
245 ***********************************************************************/
246
247 static struct aio_private_data *find_private_data_by_jobid(int jobid)
248 {
249         struct aio_private_data *pd;
250
251         for (pd = pd_list; pd != NULL; pd = pd->next) {
252                 if (pd->jobid == jobid) {
253                         return pd;
254                 }
255         }
256
257         return NULL;
258 }
259
260 /************************************************************************
261  Callback when an IO completes.
262 ***********************************************************************/
263
264 static void aio_pthread_handle_completion(struct event_context *event_ctx,
265                                 struct fd_event *event,
266                                 uint16 flags,
267                                 void *p)
268 {
269         struct aio_extra *aio_ex = NULL;
270         struct aio_private_data *pd = NULL;
271         int jobid = 0;
272         int ret;
273
274         DEBUG(10, ("aio_pthread_handle_completion called with flags=%d\n",
275                         (int)flags));
276
277         if ((flags & EVENT_FD_READ) == 0) {
278                 return;
279         }
280
281         ret = pthreadpool_finished_job(pool, &jobid);
282         if (ret) {
283                 smb_panic("aio_pthread_handle_completion");
284                 return;
285         }
286
287         pd = find_private_data_by_jobid(jobid);
288         if (pd == NULL) {
289                 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
290                           jobid));
291                 return;
292         }
293
294         aio_ex = (struct aio_extra *)pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
295         smbd_aio_complete_aio_ex(aio_ex);
296
297         DEBUG(10,("aio_pthread_handle_completion: jobid %d completed\n",
298                 jobid ));
299         TALLOC_FREE(aio_ex);
300 }
301
302 /************************************************************************
303  Find the private data by aiocb.
304 ***********************************************************************/
305
306 static struct aio_private_data *find_private_data_by_aiocb(SMB_STRUCT_AIOCB *aiocb)
307 {
308         struct aio_private_data *pd;
309
310         for (pd = pd_list; pd != NULL; pd = pd->next) {
311                 if (pd->aiocb == aiocb) {
312                         return pd;
313                 }
314         }
315
316         return NULL;
317 }
318
319 /************************************************************************
320  Called to return the result of a completed AIO.
321  Should only be called if aio_error returns something other than EINPROGRESS.
322  Returns:
323         Any other value - return from IO operation.
324 ***********************************************************************/
325
326 static ssize_t aio_pthread_return_fn(struct vfs_handle_struct *handle,
327                                 struct files_struct *fsp,
328                                 SMB_STRUCT_AIOCB *aiocb)
329 {
330         struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
331
332         if (pd == NULL) {
333                 errno = EINVAL;
334                 DEBUG(0, ("aio_pthread_return_fn: returning EINVAL\n"));
335                 return -1;
336         }
337
338         pd->aiocb = NULL;
339
340         if (pd->ret_size == -1) {
341                 errno = pd->ret_errno;
342         }
343
344         return pd->ret_size;
345 }
346
347 /************************************************************************
348  Called to check the result of an AIO.
349  Returns:
350         EINPROGRESS - still in progress.
351         EINVAL - invalid aiocb.
352         ECANCELED - request was cancelled.
353         0 - request completed successfully.
354         Any other value - errno from IO operation.
355 ***********************************************************************/
356
357 static int aio_pthread_error_fn(struct vfs_handle_struct *handle,
358                              struct files_struct *fsp,
359                              SMB_STRUCT_AIOCB *aiocb)
360 {
361         struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
362
363         if (pd == NULL) {
364                 return EINVAL;
365         }
366         if (pd->cancelled) {
367                 return ECANCELED;
368         }
369         return pd->ret_errno;
370 }
371
372 /************************************************************************
373  Called to request the cancel of an AIO, or all of them on a specific
374  fsp if aiocb == NULL.
375 ***********************************************************************/
376
377 static int aio_pthread_cancel(struct vfs_handle_struct *handle,
378                         struct files_struct *fsp,
379                         SMB_STRUCT_AIOCB *aiocb)
380 {
381         struct aio_private_data *pd = NULL;
382
383         for (pd = pd_list; pd != NULL; pd = pd->next) {
384                 if (pd->aiocb == NULL) {
385                         continue;
386                 }
387                 if (pd->aiocb->aio_fildes != fsp->fh->fd) {
388                         continue;
389                 }
390                 if ((aiocb != NULL) && (pd->aiocb != aiocb)) {
391                         continue;
392                 }
393
394                 /*
395                  * We let the child do its job, but we discard the result when
396                  * it's finished.
397                  */
398
399                 pd->cancelled = true;
400         }
401
402         return AIO_CANCELED;
403 }
404
405 /************************************************************************
406  Callback for a previously detected job completion.
407 ***********************************************************************/
408
409 static void aio_pthread_handle_immediate(struct tevent_context *ctx,
410                                 struct tevent_immediate *im,
411                                 void *private_data)
412 {
413         struct aio_extra *aio_ex = NULL;
414         int *pjobid = (int *)private_data;
415         struct aio_private_data *pd = find_private_data_by_jobid(*pjobid);
416
417         if (pd == NULL) {
418                 DEBUG(1, ("aio_pthread_handle_immediate cannot find jobid %d\n",
419                           *pjobid));
420                 TALLOC_FREE(pjobid);
421                 return;
422         }
423
424         TALLOC_FREE(pjobid);
425         aio_ex = (struct aio_extra *)pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
426         smbd_aio_complete_aio_ex(aio_ex);
427         TALLOC_FREE(aio_ex);
428 }
429
430 /************************************************************************
431  Private data struct used in suspend completion code.
432 ***********************************************************************/
433
434 struct suspend_private {
435         int num_entries;
436         int num_finished;
437         const SMB_STRUCT_AIOCB * const *aiocb_array;
438 };
439
440 /************************************************************************
441  Callback when an IO completes from a suspend call.
442 ***********************************************************************/
443
444 static void aio_pthread_handle_suspend_completion(struct event_context *event_ctx,
445                                 struct fd_event *event,
446                                 uint16 flags,
447                                 void *p)
448 {
449         struct suspend_private *sp = (struct suspend_private *)p;
450         struct aio_private_data *pd = NULL;
451         struct tevent_immediate *im = NULL;
452         int *pjobid = NULL;
453         int i;
454
455         DEBUG(10, ("aio_pthread_handle_suspend_completion called with flags=%d\n",
456                         (int)flags));
457
458         if ((flags & EVENT_FD_READ) == 0) {
459                 return;
460         }
461
462         pjobid = talloc_array(NULL, int, 1);
463         if (pjobid) {
464                 smb_panic("aio_pthread_handle_suspend_completion: no memory.");
465         }
466
467         if (pthreadpool_finished_job(pool, pjobid)) {
468                 smb_panic("aio_pthread_handle_suspend_completion: can't find job.");
469                 return;
470         }
471
472         pd = find_private_data_by_jobid(*pjobid);
473         if (pd == NULL) {
474                 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
475                           *pjobid));
476                 TALLOC_FREE(pjobid);
477                 return;
478         }
479
480         /* Is this a jobid with an aiocb we're interested in ? */
481         for (i = 0; i < sp->num_entries; i++) {
482                 if (sp->aiocb_array[i] == pd->aiocb) {
483                         sp->num_finished++;
484                         TALLOC_FREE(pjobid);
485                         return;
486                 }
487         }
488
489         /* Jobid completed we weren't waiting for.
490            We must reshedule this as an immediate event
491            on the main event context. */
492         im = tevent_create_immediate(NULL);
493         if (!im) {
494                 exit_server_cleanly("aio_pthread_handle_suspend_completion: no memory");
495         }
496
497         DEBUG(10,("aio_pthread_handle_suspend_completion: "
498                         "re-scheduling job id %d\n",
499                         *pjobid));
500
501         tevent_schedule_immediate(im,
502                         server_event_context(),
503                         aio_pthread_handle_immediate,
504                         (void *)pjobid);
505 }
506
507
508 static void aio_pthread_suspend_timed_out(struct tevent_context *event_ctx,
509                                         struct tevent_timer *te,
510                                         struct timeval now,
511                                         void *private_data)
512 {
513         bool *timed_out = (bool *)private_data;
514         /* Remove this timed event handler. */
515         TALLOC_FREE(te);
516         *timed_out = true;
517 }
518
519 /************************************************************************
520  Called to request everything to stop until all IO is completed.
521 ***********************************************************************/
522
523 static int aio_pthread_suspend(struct vfs_handle_struct *handle,
524                         struct files_struct *fsp,
525                         const SMB_STRUCT_AIOCB * const aiocb_array[],
526                         int n,
527                         const struct timespec *timeout)
528 {
529         struct event_context *ev = NULL;
530         struct fd_event *sock_event = NULL;
531         int ret = -1;
532         struct suspend_private sp;
533         bool timed_out = false;
534         TALLOC_CTX *frame = talloc_stackframe();
535
536         /* This is a blocking call, and has to use a sub-event loop. */
537         ev = event_context_init(frame);
538         if (ev == NULL) {
539                 errno = ENOMEM;
540                 goto out;
541         }
542
543         if (timeout) {
544                 struct timeval tv = convert_timespec_to_timeval(*timeout);
545                 struct tevent_timer *te = tevent_add_timer(ev,
546                                                 frame,
547                                                 timeval_current_ofs(tv.tv_sec,
548                                                                     tv.tv_usec),
549                                                 aio_pthread_suspend_timed_out,
550                                                 &timed_out);
551                 if (!te) {
552                         errno = ENOMEM;
553                         goto out;
554                 }
555         }
556
557         ZERO_STRUCT(sp);
558         sp.num_entries = n;
559         sp.aiocb_array = aiocb_array;
560         sp.num_finished = 0;
561
562         sock_event = tevent_add_fd(ev,
563                                 frame,
564                                 pthreadpool_signal_fd(pool),
565                                 TEVENT_FD_READ,
566                                 aio_pthread_handle_suspend_completion,
567                                 (void *)&sp);
568         if (sock_event == NULL) {
569                 pthreadpool_destroy(pool);
570                 pool = NULL;
571                 goto out;
572         }
573         /*
574          * We're going to cheat here. We know that smbd/aio.c
575          * only calls this when it's waiting for every single
576          * outstanding call to finish on a close, so just wait
577          * individually for each IO to complete. We don't care
578          * what order they finish - only that they all do. JRA.
579          */
580         while (sp.num_entries != sp.num_finished) {
581                 if (tevent_loop_once(ev) == -1) {
582                         goto out;
583                 }
584
585                 if (timed_out) {
586                         errno = EAGAIN;
587                         goto out;
588                 }
589         }
590
591         ret = 0;
592
593   out:
594
595         TALLOC_FREE(frame);
596         return ret;
597 }
598
599 static struct vfs_fn_pointers vfs_aio_pthread_fns = {
600         .aio_read_fn = aio_pthread_read,
601         .aio_write_fn = aio_pthread_write,
602         .aio_return_fn = aio_pthread_return_fn,
603         .aio_cancel_fn = aio_pthread_cancel,
604         .aio_error_fn = aio_pthread_error_fn,
605         .aio_suspend_fn = aio_pthread_suspend,
606 };
607
608 NTSTATUS vfs_aio_pthread_init(void);
609 NTSTATUS vfs_aio_pthread_init(void)
610 {
611         return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
612                                 "aio_pthread", &vfs_aio_pthread_fns);
613 }