2 Unix SMB/CIFS implementation.
4 main select loop and event handling - epoll implementation
6 Copyright (C) Andrew Tridgell 2003-2005
7 Copyright (C) Stefan Metzmacher 2005
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include "system/filesys.h"
25 #include "system/network.h"
26 #include "lib/util/dlinklist.h"
27 #include "lib/events/events.h"
28 #include "lib/events/events_internal.h"
29 #include <sys/epoll.h>
31 struct epoll_event_context {
32 /* a pointer back to the generic event_context */
33 struct event_context *ev;
35 /* list of filedescriptor events */
36 struct fd_event *fd_events;
38 /* number of registered fd event handlers */
41 /* this is changed by the destructors for the fd event
42 type. It is used to detect event destruction by event
43 handlers, which means the code that is calling the event
44 handler needs to assume that the linked list is no longer
47 uint32_t destruction_count;
49 /* when using epoll this is the handle from epoll_create */
56 called when a epoll call fails, and we should fallback
59 static void epoll_fallback_to_select(struct epoll_event_context *epoll_ev, const char *reason)
61 DEBUG(0,("%s (%s) - falling back to select()\n", reason, strerror(errno)));
62 close(epoll_ev->epoll_fd);
63 epoll_ev->epoll_fd = -1;
64 talloc_set_destructor(epoll_ev, NULL);
68 map from EVENT_FD_* to EPOLLIN/EPOLLOUT
70 static uint32_t epoll_map_flags(uint16_t flags)
73 if (flags & EVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
74 if (flags & EVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
81 static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
83 close(epoll_ev->epoll_fd);
84 epoll_ev->epoll_fd = -1;
91 static void epoll_init_ctx(struct epoll_event_context *epoll_ev)
93 epoll_ev->epoll_fd = epoll_create(64);
94 epoll_ev->pid = getpid();
95 talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
98 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct fd_event *fde);
101 reopen the epoll handle when our pid changes
102 see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
103 demonstration of why this is needed
105 static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
107 struct fd_event *fde;
109 if (epoll_ev->pid == getpid()) {
113 close(epoll_ev->epoll_fd);
114 epoll_ev->epoll_fd = epoll_create(64);
115 if (epoll_ev->epoll_fd == -1) {
116 DEBUG(0,("Failed to recreate epoll handle after fork\n"));
119 epoll_ev->pid = getpid();
120 for (fde=epoll_ev->fd_events;fde;fde=fde->next) {
121 epoll_add_event(epoll_ev, fde);
125 #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
126 #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
127 #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
130 add the epoll event to the given fd_event
132 static void epoll_add_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
134 struct epoll_event event;
136 if (epoll_ev->epoll_fd == -1) return;
138 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
140 /* if we don't want events yet, don't add an epoll_event */
141 if (fde->flags == 0) return;
144 event.events = epoll_map_flags(fde->flags);
145 event.data.ptr = fde;
146 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event) != 0) {
147 epoll_fallback_to_select(epoll_ev, "EPOLL_CTL_ADD failed");
149 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
151 /* only if we want to read we want to tell the event handler about errors */
152 if (fde->flags & EVENT_FD_READ) {
153 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
158 delete the epoll event for given fd_event
160 static void epoll_del_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
162 struct epoll_event event;
164 DLIST_REMOVE(epoll_ev->fd_events, fde);
166 if (epoll_ev->epoll_fd == -1) return;
168 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
170 /* if there's no epoll_event, we don't need to delete it */
171 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) return;
174 event.events = epoll_map_flags(fde->flags);
175 event.data.ptr = fde;
176 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event) != 0) {
177 DEBUG(0,("epoll_del_event failed! probable early close bug (%s)\n", strerror(errno)));
179 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
183 change the epoll event to the given fd_event
185 static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
187 struct epoll_event event;
188 if (epoll_ev->epoll_fd == -1) return;
190 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
193 event.events = epoll_map_flags(fde->flags);
194 event.data.ptr = fde;
195 if (epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event) != 0) {
196 epoll_fallback_to_select(epoll_ev, "EPOLL_CTL_MOD failed");
199 /* only if we want to read we want to tell the event handler about errors */
200 if (fde->flags & EVENT_FD_READ) {
201 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
205 static void epoll_change_event(struct epoll_event_context *epoll_ev, struct fd_event *fde)
207 bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
208 bool want_read = (fde->flags & EVENT_FD_READ);
209 bool want_write= (fde->flags & EVENT_FD_WRITE);
211 if (epoll_ev->epoll_fd == -1) return;
213 fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
215 /* there's already an event */
216 if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
217 if (want_read || (want_write && !got_error)) {
218 epoll_mod_event(epoll_ev, fde);
222 * if we want to match the select behavior, we need to remove the epoll_event
223 * when the caller isn't interested in events.
225 * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
227 epoll_del_event(epoll_ev, fde);
231 /* there's no epoll_event attached to the fde */
232 if (want_read || (want_write && !got_error)) {
233 DLIST_ADD(epoll_ev->fd_events, fde);
234 epoll_add_event(epoll_ev, fde);
240 event loop handling using epoll
242 static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
246 struct epoll_event events[MAXEVENTS];
247 uint32_t destruction_count = ++epoll_ev->destruction_count;
250 if (epoll_ev->epoll_fd == -1) return -1;
253 /* it's better to trigger timed events a bit later than to early */
254 timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
257 if (epoll_ev->ev->num_signal_handlers &&
258 common_event_check_signal(epoll_ev->ev)) {
262 ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
264 if (ret == -1 && errno == EINTR && epoll_ev->ev->num_signal_handlers) {
265 if (common_event_check_signal(epoll_ev->ev)) {
270 if (ret == -1 && errno != EINTR) {
271 epoll_fallback_to_select(epoll_ev, "epoll_wait() failed");
275 if (ret == 0 && tvalp) {
276 /* we don't care about a possible delay here */
277 common_event_loop_timer_delay(epoll_ev->ev);
281 for (i=0;i<ret;i++) {
282 struct fd_event *fde = talloc_get_type(events[i].data.ptr,
287 epoll_fallback_to_select(epoll_ev, "epoll_wait() gave bad data");
290 if (events[i].events & (EPOLLHUP|EPOLLERR)) {
291 fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
293 * if we only wait for EVENT_FD_WRITE, we should not tell the
294 * event handler about it, and remove the epoll_event,
295 * as we only report errors when waiting for read events,
296 * to match the select() behavior
298 if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
299 epoll_del_event(epoll_ev, fde);
302 flags |= EVENT_FD_READ;
304 if (events[i].events & EPOLLIN) flags |= EVENT_FD_READ;
305 if (events[i].events & EPOLLOUT) flags |= EVENT_FD_WRITE;
307 fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
308 if (destruction_count != epoll_ev->destruction_count) {
318 create a epoll_event_context structure.
320 static int epoll_event_context_init(struct event_context *ev)
322 struct epoll_event_context *epoll_ev;
324 epoll_ev = talloc_zero(ev, struct epoll_event_context);
325 if (!epoll_ev) return -1;
327 epoll_ev->epoll_fd = -1;
329 epoll_init_ctx(epoll_ev);
331 ev->additional_data = epoll_ev;
338 static int epoll_event_fd_destructor(struct fd_event *fde)
340 struct event_context *ev = fde->event_ctx;
341 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
342 struct epoll_event_context);
344 epoll_check_reopen(epoll_ev);
346 epoll_ev->num_fd_events--;
347 epoll_ev->destruction_count++;
349 epoll_del_event(epoll_ev, fde);
351 if (fde->flags & EVENT_FD_AUTOCLOSE) {
361 return NULL on failure (memory allocation error)
363 static struct fd_event *epoll_event_add_fd(struct event_context *ev, TALLOC_CTX *mem_ctx,
364 int fd, uint16_t flags,
365 event_fd_handler_t handler,
368 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
369 struct epoll_event_context);
370 struct fd_event *fde;
372 epoll_check_reopen(epoll_ev);
374 fde = talloc(mem_ctx?mem_ctx:ev, struct fd_event);
375 if (!fde) return NULL;
380 fde->handler = handler;
381 fde->private_data = private_data;
382 fde->additional_flags = 0;
383 fde->additional_data = NULL;
385 epoll_ev->num_fd_events++;
386 talloc_set_destructor(fde, epoll_event_fd_destructor);
388 DLIST_ADD(epoll_ev->fd_events, fde);
389 epoll_add_event(epoll_ev, fde);
396 return the fd event flags
398 static uint16_t epoll_event_get_fd_flags(struct fd_event *fde)
404 set the fd event flags
406 static void epoll_event_set_fd_flags(struct fd_event *fde, uint16_t flags)
408 struct event_context *ev;
409 struct epoll_event_context *epoll_ev;
411 if (fde->flags == flags) return;
414 epoll_ev = talloc_get_type(ev->additional_data, struct epoll_event_context);
418 epoll_check_reopen(epoll_ev);
420 epoll_change_event(epoll_ev, fde);
424 do a single event loop using the events defined in ev
426 static int epoll_event_loop_once(struct event_context *ev)
428 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
429 struct epoll_event_context);
432 tval = common_event_loop_timer_delay(ev);
433 if (timeval_is_zero(&tval)) {
437 epoll_check_reopen(epoll_ev);
439 return epoll_event_loop(epoll_ev, &tval);
443 return on failure or (with 0) if all fd events are removed
445 static int epoll_event_loop_wait(struct event_context *ev)
447 struct epoll_event_context *epoll_ev = talloc_get_type(ev->additional_data,
448 struct epoll_event_context);
449 while (epoll_ev->num_fd_events) {
450 if (epoll_event_loop_once(ev) != 0) {
458 static const struct event_ops epoll_event_ops = {
459 .context_init = epoll_event_context_init,
460 .add_fd = epoll_event_add_fd,
461 .get_fd_flags = epoll_event_get_fd_flags,
462 .set_fd_flags = epoll_event_set_fd_flags,
463 .add_timed = common_event_add_timed,
464 .add_signal = common_event_add_signal,
465 .loop_once = epoll_event_loop_once,
466 .loop_wait = epoll_event_loop_wait,
469 bool events_epoll_init(void)
471 return event_register_backend("epoll", &epoll_event_ops);
475 NTSTATUS s4_events_epoll_init(void)
477 if (!events_epoll_init()) {
478 return NT_STATUS_INTERNAL_ERROR;