2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include "system/filesys.h"
25 #include "system/network.h"
26 #include "lib/util/dlinklist.h"
27 #include "lib/events/events.h"
28 #include "lib/events/events_internal.h"
29 #include <sys/epoll.h>
31 struct epoll_event_context {
32 /* a pointer back to the generic event_context */
33 struct event_context *ev;
35 /* list of filedescriptor events */
36 struct fd_event *fd_events;
38 /* number of registered fd event handlers */
41 /* this is changed by the destructors for the fd event
42 type. It is used to detect event destruction by event
43 handlers, which means the code that is calling the event
44 handler needs to assume that the linked list is no longer
47 uint32_t destruction_count;
49 /* when using epoll this is the handle from epoll_create */
56 called when a epoll call fails, and we should fallback
59 _NORETURN_ static void epoll_panic(struct epoll_event_context *epoll_ev, const char *reason)
61 DEBUG(0,("%s (%s) - calling abort()\n", reason, strerror(errno)));
66 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
68 static uint32_t epoll_map_flags(uint16_t flags)
71 if (flags & EVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
72 if (flags & EVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
79 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
81 close(epoll_ev->epoll_fd);
82 epoll_ev->epoll_fd = -1;
89 static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
91 epoll_ev->epoll_fd = epoll_create(64);
92 epoll_ev->pid = getpid();
93 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
94 if (epoll_ev->epoll_fd == -1) {
100 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct fd_event *fde);
103 reopen the epoll handle when our pid changes
104 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
105 demonstration of why this is needed
107 static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
109 struct fd_event *fde;
111 if (epoll_ev->pid == getpid()) {
115 close(epoll_ev->epoll_fd);
116 epoll_ev->epoll_fd = epoll_create(64);
117 if (epoll_ev->epoll_fd == -1) {
118 DEBUG(0,("Failed to recreate epoll handle after fork\n"));
121 epoll_ev->pid = getpid();
122 for (fde=epoll_ev->fd_events;fde;fde=fde->next) {
123 epoll_add_event(epoll_ev, fde);
127 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
128 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
129 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
132 add the epoll event to the given fd_event
134 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
136 struct epoll_event event;
138 if (epoll_ev->epoll_fd == -1) return;
140 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
142 /* if we don't want events yet, don't add an epoll_event */
143 if (fde->flags == 0) return;
146 event.events = epoll_map_flags(fde->flags);
147 event.data.ptr = fde;
148 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
149 epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed");
151 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
153 /* only if we want to read we want to tell the event handler about errors */
154 if (fde->flags & EVENT_FD_READ) {
155 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
160 delete the epoll event for given fd_event
162 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
164 struct epoll_event event;
166 DLIST_REMOVE(epoll_ev->fd_events, fde);
168 if (epoll_ev->epoll_fd == -1) return;
170 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
172 /* if there's no epoll_event, we don't need to delete it */
173 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
176 event.events = epoll_map_flags(fde->flags);
177 event.data.ptr = fde;
178 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event) != 0) {
179 DEBUG(0,("epoll_del_event failed! probable early close bug (%s)\n", strerror(errno)));
181 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
185 change the epoll event to the given fd_event
187 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
189 struct epoll_event event;
190 if (epoll_ev->epoll_fd == -1) return;
192 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
195 event.events = epoll_map_flags(fde->flags);
196 event.data.ptr = fde;
197 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
198 epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed");
201 /* only if we want to read we want to tell the event handler about errors */
202 if (fde->flags & EVENT_FD_READ) {
203 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
207 static void epoll_change_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
209 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
210 bool want_read = (fde->flags & EVENT_FD_READ);
211 bool want_write= (fde->flags & EVENT_FD_WRITE);
213 if (epoll_ev->epoll_fd == -1) return;
215 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
217 /* there's already an event */
218 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
219 if (want_read || (want_write && !got_error)) {
220 epoll_mod_event(epoll_ev, fde);
224 * if we want to match the select behavior, we need to remove the epoll_event
225 * when the caller isn't interested in events.
227 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
229 epoll_del_event(epoll_ev, fde);
233 /* there's no epoll_event attached to the fde */
234 if (want_read || (want_write && !got_error)) {
235 DLIST_ADD(epoll_ev->fd_events, fde);
236 epoll_add_event(epoll_ev, fde);
242 event loop handling using epoll
244 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
248 struct epoll_event events[MAXEVENTS];
249 uint32_t destruction_count = ++epoll_ev->destruction_count;
252 if (epoll_ev->epoll_fd == -1) return -1;
255 /* it's better to trigger timed events a bit later than to early */
256 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
259 if (epoll_ev->ev->num_signal_handlers &&
260 common_event_check_signal(epoll_ev->ev)) {
264 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
266 if (ret == -1 && errno == EINTR && epoll_ev->ev->num_signal_handlers) {
267 if (common_event_check_signal(epoll_ev->ev)) {
272 if (ret == -1 && errno != EINTR) {
273 epoll_panic(epoll_ev, "epoll_wait() failed");
277 if (ret == 0 && tvalp) {
278 /* we don't care about a possible delay here */
279 common_event_loop_timer_delay(epoll_ev->ev);
283 for (i=0;i<ret;i++) {
284 struct fd_event *fde = talloc_get_type(events[i].data.ptr,
289 epoll_panic(epoll_ev, "epoll_wait() gave bad data");
292 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
293 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
295 * if we only wait for EVENT_FD_WRITE, we should not tell the
296 * event handler about it, and remove the epoll_event,
297 * as we only report errors when waiting for read events,
298 * to match the select() behavior
300 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
301 epoll_del_event(epoll_ev, fde);
304 flags |= EVENT_FD_READ;
306 if (events[i].events & EPOLLIN) flags |= EVENT_FD_READ;
307 if (events[i].events & EPOLLOUT) flags |= EVENT_FD_WRITE;
309 fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
310 if (destruction_count != epoll_ev->destruction_count) {
320 create a epoll_event_context structure.
322 static int epoll_event_context_init(struct event_context *ev)
325 struct epoll_event_context *epoll_ev;
327 epoll_ev = talloc_zero(ev, struct epoll_event_context);
328 if (!epoll_ev) return -1;
330 epoll_ev->epoll_fd = -1;
332 ret = epoll_init_ctx(epoll_ev);
334 talloc_free(epoll_ev);
338 ev->additional_data = epoll_ev;
345 static int epoll_event_fd_destructor(struct fd_event *fde)
347 struct event_context *ev = fde->event_ctx;
348 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
349 struct epoll_event_context);
351 epoll_check_reopen(epoll_ev);
353 epoll_ev->num_fd_events--;
354 epoll_ev->destruction_count++;
356 epoll_del_event(epoll_ev, fde);
358 if (fde->flags & EVENT_FD_AUTOCLOSE) {
368 return NULL on failure (memory allocation error)
370 static struct fd_event *epoll_event_add_fd(struct event_context *ev, TALLOC_CTX *mem_ctx,
371 int fd, uint16_t flags,
372 event_fd_handler_t handler,
375 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
376 struct epoll_event_context);
377 struct fd_event *fde;
379 epoll_check_reopen(epoll_ev);
381 fde = talloc(mem_ctx?mem_ctx:ev, struct fd_event);
382 if (!fde) return NULL;
387 fde->handler = handler;
388 fde->private_data = private_data;
389 fde->additional_flags = 0;
390 fde->additional_data = NULL;
392 epoll_ev->num_fd_events++;
393 talloc_set_destructor(fde, epoll_event_fd_destructor);
395 DLIST_ADD(epoll_ev->fd_events, fde);
396 epoll_add_event(epoll_ev, fde);
403 return the fd event flags
405 static uint16_t epoll_event_get_fd_flags(struct fd_event *fde)
411 set the fd event flags
413 static void epoll_event_set_fd_flags(struct fd_event *fde, uint16_t flags)
415 struct event_context *ev;
416 struct epoll_event_context *epoll_ev;
418 if (fde->flags == flags) return;
421 epoll_ev = talloc_get_type(ev->additional_data, struct epoll_event_context);
425 epoll_check_reopen(epoll_ev);
427 epoll_change_event(epoll_ev, fde);
431 do a single event loop using the events defined in ev
433 static int epoll_event_loop_once(struct event_context *ev)
435 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
436 struct epoll_event_context);
439 tval = common_event_loop_timer_delay(ev);
440 if (timeval_is_zero(&tval)) {
444 epoll_check_reopen(epoll_ev);
446 return epoll_event_loop(epoll_ev, &tval);
450 return on failure or (with 0) if all fd events are removed
452 static int epoll_event_loop_wait(struct event_context *ev)
454 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
455 struct epoll_event_context);
456 while (epoll_ev->num_fd_events) {
457 if (epoll_event_loop_once(ev) != 0) {
465 static const struct event_ops epoll_event_ops = {
466 .context_init = epoll_event_context_init,
467 .add_fd = epoll_event_add_fd,
468 .get_fd_flags = epoll_event_get_fd_flags,
469 .set_fd_flags = epoll_event_set_fd_flags,
470 .add_timed = common_event_add_timed,
471 .add_signal = common_event_add_signal,
472 .loop_once = epoll_event_loop_once,
473 .loop_wait = epoll_event_loop_wait,
476 bool events_epoll_init(void)
478 return event_register_backend("epoll", &epoll_event_ops);
482 NTSTATUS s4_events_epoll_init(void)
484 if (!events_epoll_init()) {
485 return NT_STATUS_INTERNAL_ERROR;