2 Unix SMB/CIFS implementation.
4 main select loop and event handling - aio/epoll hybrid implementation
6 Copyright (C) Andrew Tridgell 2006
8 based on events_standard.c
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 this is a very strange beast. The Linux AIO implementation doesn't
26 yet integrate properly with epoll, but there is a kernel patch that
27 allows the aio wait primitives to be used to wait for epoll events,
28 and this can be used to give us a unified event system incorporating
29 both aio events and epoll events
31 this is _very_ experimental code
35 #include "system/filesys.h"
36 #include "lib/util/dlinklist.h"
37 #include "lib/events/events.h"
38 #include "lib/events/events_internal.h"
39 #include <sys/epoll.h>
42 #define MAX_AIO_QUEUE_DEPTH 100
43 #ifndef IOCB_CMD_EPOLL_WAIT
44 #define IOCB_CMD_EPOLL_WAIT 9
47 struct aio_event_context {
48 /* a pointer back to the generic event_context */
49 struct event_context *ev;
51 /* number of registered fd event handlers */
54 uint32_t destruction_count;
58 struct epoll_event epevent[MAX_AIO_QUEUE_DEPTH];
60 struct iocb *epoll_iocb;
67 struct event_context *event_ctx;
70 event_aio_handler_t handler;
74 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
76 static uint32_t epoll_map_flags(uint16_t flags)
79 if (flags & EVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
80 if (flags & EVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
87 static int aio_ctx_destructor(struct aio_event_context *aio_ev)
89 io_queue_release(aio_ev->ioctx);
90 close(aio_ev->epoll_fd);
91 aio_ev->epoll_fd = -1;
95 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
96 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
97 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
100 add the epoll event to the given fd_event
102 static void epoll_add_event(struct aio_event_context *aio_ev, struct fd_event *fde)
104 struct epoll_event event;
105 if (aio_ev->epoll_fd == -1) return;
107 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
109 /* if we don't want events yet, don't add an aio_event */
110 if (fde->flags == 0) return;
113 event.events = epoll_map_flags(fde->flags);
114 event.data.ptr = fde;
115 epoll_ctl(aio_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event);
116 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
118 /* only if we want to read we want to tell the event handler about errors */
119 if (fde->flags & EVENT_FD_READ) {
120 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
125 delete the epoll event for given fd_event
127 static void epoll_del_event(struct aio_event_context *aio_ev, struct fd_event *fde)
129 struct epoll_event event;
130 if (aio_ev->epoll_fd == -1) return;
132 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
134 /* if there's no aio_event, we don't need to delete it */
135 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
138 event.events = epoll_map_flags(fde->flags);
139 event.data.ptr = fde;
140 epoll_ctl(aio_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
142 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
146 change the epoll event to the given fd_event
148 static void epoll_mod_event(struct aio_event_context *aio_ev, struct fd_event *fde)
150 struct epoll_event event;
151 if (aio_ev->epoll_fd == -1) return;
153 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
156 event.events = epoll_map_flags(fde->flags);
157 event.data.ptr = fde;
158 epoll_ctl(aio_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event);
160 /* only if we want to read we want to tell the event handler about errors */
161 if (fde->flags & EVENT_FD_READ) {
162 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
166 static void epoll_change_event(struct aio_event_context *aio_ev, struct fd_event *fde)
168 BOOL got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
169 BOOL want_read = (fde->flags & EVENT_FD_READ);
170 BOOL want_write= (fde->flags & EVENT_FD_WRITE);
172 if (aio_ev->epoll_fd == -1) return;
174 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
176 /* there's already an event */
177 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
178 if (want_read || (want_write && !got_error)) {
179 epoll_mod_event(aio_ev, fde);
182 epoll_del_event(aio_ev, fde);
186 /* there's no aio_event attached to the fde */
187 if (want_read || (want_write && !got_error)) {
188 epoll_add_event(aio_ev, fde);
193 static int setup_epoll_wait(struct aio_event_context *aio_ev)
195 if (aio_ev->is_epoll_set) {
198 memset(aio_ev->epoll_iocb, 0, sizeof(*aio_ev->epoll_iocb));
199 aio_ev->epoll_iocb->aio_fildes = aio_ev->epoll_fd;
200 aio_ev->epoll_iocb->aio_lio_opcode = IOCB_CMD_EPOLL_WAIT;
201 aio_ev->epoll_iocb->aio_reqprio = 0;
203 aio_ev->epoll_iocb->u.c.nbytes = MAX_AIO_QUEUE_DEPTH;
204 aio_ev->epoll_iocb->u.c.offset = -1;
205 aio_ev->epoll_iocb->u.c.buf = aio_ev->epevent;
207 aio_ev->is_epoll_set = 1;
208 if (io_submit(aio_ev->ioctx, 1, &aio_ev->epoll_iocb) != 1) {
217 event loop handling using aio/epoll hybrid
219 static int aio_event_loop(struct aio_event_context *aio_ev, struct timeval *tvalp)
222 uint32_t destruction_count = aio_ev->destruction_count;
223 struct timespec timeout;
224 struct io_event events[8];
226 if (aio_ev->epoll_fd == -1) return -1;
229 timeout.tv_sec = tvalp->tv_sec;
230 timeout.tv_nsec = tvalp->tv_usec;
231 timeout.tv_nsec *= 1000;
234 if (setup_epoll_wait(aio_ev) < 0)
237 ret = io_getevents(aio_ev->ioctx, 1, 8,
238 events, tvalp?&timeout:NULL);
243 if (ret == 0 && tvalp) {
244 common_event_loop_timer(aio_ev->ev);
248 for (i=0;i<ret;i++) {
249 struct io_event *event = &events[i];
250 struct iocb *finished = event->obj;
252 switch (finished->aio_lio_opcode) {
255 struct aio_event *ae = talloc_get_type(finished->data,
258 talloc_set_destructor(ae, NULL);
259 ae->handler(ae->event_ctx, ae,
260 event->res, ae->private_data);
265 case IOCB_CMD_EPOLL_WAIT: {
266 struct epoll_event *ep = (struct epoll_event *)finished->u.c.buf;
267 struct fd_event *fde;
271 aio_ev->is_epoll_set = 0;
273 for (j=0; j<event->res; j++, ep++) {
274 fde = talloc_get_type(ep->data.ptr,
279 if (ep->events & (EPOLLHUP|EPOLLERR)) {
280 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
281 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
282 epoll_del_event(aio_ev, fde);
285 flags |= EVENT_FD_READ;
287 if (ep->events & EPOLLIN) flags |= EVENT_FD_READ;
288 if (ep->events & EPOLLOUT) flags |= EVENT_FD_WRITE;
290 fde->handler(aio_ev->ev, fde, flags, fde->private_data);
296 if (destruction_count != aio_ev->destruction_count) {
305 create a aio_event_context structure.
307 static int aio_event_context_init(struct event_context *ev)
309 struct aio_event_context *aio_ev;
311 aio_ev = talloc_zero(ev, struct aio_event_context);
312 if (!aio_ev) return -1;
315 aio_ev->epoll_iocb = talloc(aio_ev, struct iocb);
317 if (io_queue_init(MAX_AIO_QUEUE_DEPTH, &aio_ev->ioctx) != 0) {
321 aio_ev->epoll_fd = epoll_create(MAX_AIO_QUEUE_DEPTH);
322 if (aio_ev->epoll_fd == -1) return -1;
324 talloc_set_destructor(aio_ev, aio_ctx_destructor);
326 ev->additional_data = aio_ev;
333 static int aio_event_fd_destructor(struct fd_event *fde)
335 struct event_context *ev = fde->event_ctx;
336 struct aio_event_context *aio_ev = talloc_get_type(ev->additional_data,
337 struct aio_event_context);
339 aio_ev->num_fd_events--;
340 aio_ev->destruction_count++;
342 epoll_del_event(aio_ev, fde);
349 return NULL on failure (memory allocation error)
351 static struct fd_event *aio_event_add_fd(struct event_context *ev, TALLOC_CTX *mem_ctx,
352 int fd, uint16_t flags,
353 event_fd_handler_t handler,
356 struct aio_event_context *aio_ev = talloc_get_type(ev->additional_data,
357 struct aio_event_context);
358 struct fd_event *fde;
360 fde = talloc(mem_ctx?mem_ctx:ev, struct fd_event);
361 if (!fde) return NULL;
366 fde->handler = handler;
367 fde->private_data = private_data;
368 fde->additional_flags = 0;
369 fde->additional_data = NULL;
371 aio_ev->num_fd_events++;
372 talloc_set_destructor(fde, aio_event_fd_destructor);
374 epoll_add_event(aio_ev, fde);
381 return the fd event flags
383 static uint16_t aio_event_get_fd_flags(struct fd_event *fde)
389 set the fd event flags
391 static void aio_event_set_fd_flags(struct fd_event *fde, uint16_t flags)
393 struct event_context *ev;
394 struct aio_event_context *aio_ev;
396 if (fde->flags == flags) return;
399 aio_ev = talloc_get_type(ev->additional_data, struct aio_event_context);
403 epoll_change_event(aio_ev, fde);
407 do a single event loop using the events defined in ev
409 static int aio_event_loop_once(struct event_context *ev)
411 struct aio_event_context *aio_ev = talloc_get_type(ev->additional_data,
412 struct aio_event_context);
415 tval = common_event_loop_delay(ev);
417 if (timeval_is_zero(&tval)) {
418 common_event_loop_timer(ev);
422 return aio_event_loop(aio_ev, &tval);
426 return on failure or (with 0) if all fd events are removed
428 static int aio_event_loop_wait(struct event_context *ev)
430 struct aio_event_context *aio_ev = talloc_get_type(ev->additional_data,
431 struct aio_event_context);
432 while (aio_ev->num_fd_events) {
433 if (aio_event_loop_once(ev) != 0) {
442 called when a disk IO event needs to be cancelled
444 static int aio_destructor(struct aio_event *ae)
446 struct event_context *ev = ae->event_ctx;
447 struct aio_event_context *aio_ev = talloc_get_type(ev->additional_data,
448 struct aio_event_context);
449 struct io_event result;
450 io_cancel(aio_ev->ioctx, &ae->iocb, &result);
451 /* TODO: handle errors from io_cancel()! */
455 /* submit an aio disk IO event */
456 static struct aio_event *aio_event_add_aio(struct event_context *ev,
459 event_aio_handler_t handler,
462 struct aio_event_context *aio_ev = talloc_get_type(ev->additional_data,
463 struct aio_event_context);
465 struct aio_event *ae = talloc(mem_ctx?mem_ctx:ev, struct aio_event);
466 if (ae == NULL) return NULL;
470 ae->handler = handler;
471 ae->private_data = private_data;
474 if (io_submit(aio_ev->ioctx, 1, &iocbp) != 1) {
479 talloc_set_destructor(ae, aio_destructor);
484 static const struct event_ops aio_event_ops = {
485 .context_init = aio_event_context_init,
486 .add_fd = aio_event_add_fd,
487 .add_aio = aio_event_add_aio,
488 .get_fd_flags = aio_event_get_fd_flags,
489 .set_fd_flags = aio_event_set_fd_flags,
490 .add_timed = common_event_add_timed,
491 .loop_once = aio_event_loop_once,
492 .loop_wait = aio_event_loop_wait,
495 NTSTATUS events_aio_init(void)
497 return event_register_backend("aio", &aio_event_ops);