s3: Add sys_poll_intr
[samba.git] / lib / util / select.c
1 /*
2    Unix SMB/Netbios implementation.
3    Version 3.0
4    Samba select/poll implementation
5    Copyright (C) Andrew Tridgell 1992-1998
6
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "includes.h"
22 #include "system/filesys.h"
23 #include "system/select.h"
24 #include "lib/util/select.h"
25
26 /* This is here because it allows us to avoid a nasty race in signal handling.
27    We need to guarantee that when we get a signal we get out of a select immediately
28    but doing that involves a race condition. We can avoid the race by getting the
29    signal handler to write to a pipe that is in the select/poll list
30
31    This means all Samba signal handlers should call sys_select_signal().
32 */
33
34 static pid_t initialised;
35 static int select_pipe[2];
36 static volatile unsigned pipe_written, pipe_read;
37
38 /*******************************************************************
39  Call this from all Samba signal handlers if you want to avoid a
40  nasty signal race condition.
41 ********************************************************************/
42
43 void sys_select_signal(char c)
44 {
45         int saved_errno = errno;
46
47         if (!initialised) return;
48
49         if (pipe_written > pipe_read+256) return;
50
51         if (write(select_pipe[1], &c, 1) == 1) pipe_written++;
52
53         errno = saved_errno;
54 }
55
56 /*******************************************************************
57  Like select() but avoids the signal race using a pipe
58  it also guuarantees that fds on return only ever contains bits set
59  for file descriptors that were readable.
60 ********************************************************************/
61
62 int sys_select(int maxfd, fd_set *readfds, fd_set *writefds, fd_set *errorfds, struct timeval *tval)
63 {
64         int ret, saved_errno;
65         fd_set *readfds2, readfds_buf;
66
67         if (initialised != sys_getpid()) {
68                 if (pipe(select_pipe) == -1)
69                 {
70                         DEBUG(0, ("sys_select: pipe failed (%s)\n",
71                                 strerror(errno)));
72                         if (readfds != NULL)
73                                 FD_ZERO(readfds);
74                         if (writefds != NULL)
75                                 FD_ZERO(writefds);
76                         if (errorfds != NULL)
77                                 FD_ZERO(errorfds);
78                         return -1;
79                 }
80
81                 /*
82                  * These next two lines seem to fix a bug with the Linux
83                  * 2.0.x kernel (and probably other UNIXes as well) where
84                  * the one byte read below can block even though the
85                  * select returned that there is data in the pipe and
86                  * the pipe_written variable was incremented. Thanks to
87                  * HP for finding this one. JRA.
88                  */
89
90                 if(set_blocking(select_pipe[0],0)==-1)
91                         smb_panic("select_pipe[0]: O_NONBLOCK failed");
92                 if(set_blocking(select_pipe[1],0)==-1)
93                         smb_panic("select_pipe[1]: O_NONBLOCK failed");
94
95                 initialised = sys_getpid();
96         }
97
98         maxfd = MAX(select_pipe[0]+1, maxfd);
99
100         /* If readfds is NULL we need to provide our own set. */
101         if (readfds) {
102                 readfds2 = readfds;
103         } else {
104                 readfds2 = &readfds_buf;
105                 FD_ZERO(readfds2);
106         }
107         FD_SET(select_pipe[0], readfds2);
108
109         errno = 0;
110         ret = select(maxfd,readfds2,writefds,errorfds,tval);
111
112         if (ret <= 0) {
113                 FD_ZERO(readfds2);
114                 if (writefds)
115                         FD_ZERO(writefds);
116                 if (errorfds)
117                         FD_ZERO(errorfds);
118         } else if (FD_ISSET(select_pipe[0], readfds2)) {
119                 char c;
120                 saved_errno = errno;
121                 if (read(select_pipe[0], &c, 1) == 1) {
122                         pipe_read++;
123                         /* Mark Weaver <mark-clist@npsl.co.uk> pointed out a critical
124                            fix to ensure we don't lose signals. We must always
125                            return -1 when the select pipe is set, otherwise if another
126                            fd is also ready (so ret == 2) then we used to eat the
127                            byte in the pipe and lose the signal. JRA.
128                         */
129                         ret = -1;
130 #if 0
131                         /* JRA - we can use this to debug the signal messaging... */
132                         DEBUG(0,("select got %u signal\n", (unsigned int)c));
133 #endif
134                         errno = EINTR;
135                 } else {
136                         FD_CLR(select_pipe[0], readfds2);
137                         ret--;
138                         errno = saved_errno;
139                 }
140         }
141
142         return ret;
143 }
144
145 /*******************************************************************
146  Similar to sys_select() but catch EINTR and continue.
147  This is what sys_select() used to do in Samba.
148 ********************************************************************/
149
150 int sys_select_intr(int maxfd, fd_set *readfds, fd_set *writefds, fd_set *errorfds, struct timeval *tval)
151 {
152         int ret;
153         fd_set *readfds2, readfds_buf, *writefds2, writefds_buf, *errorfds2, errorfds_buf;
154         struct timeval tval2, *ptval;
155         struct timespec end_time;
156
157         readfds2 = (readfds ? &readfds_buf : NULL);
158         writefds2 = (writefds ? &writefds_buf : NULL);
159         errorfds2 = (errorfds ? &errorfds_buf : NULL);
160         if (tval) {
161                 clock_gettime_mono(&end_time);
162                 end_time.tv_sec += tval->tv_sec;
163                 end_time.tv_nsec += tval->tv_usec *1000;
164                 end_time.tv_sec += end_time.tv_nsec / 1000000000;
165                 end_time.tv_nsec %= 1000000000;
166                 errno = 0;
167                 tval2 = *tval;
168                 ptval = &tval2;
169         } else {
170                 ptval = NULL;
171         }
172
173         do {
174                 if (readfds)
175                         readfds_buf = *readfds;
176                 if (writefds)
177                         writefds_buf = *writefds;
178                 if (errorfds)
179                         errorfds_buf = *errorfds;
180                 if (ptval && (errno == EINTR)) {
181                         struct timespec now_time;
182                         int64_t tdif;
183
184                         clock_gettime_mono(&now_time);
185                         tdif = nsec_time_diff(&end_time,&now_time);
186                         if (tdif <= 0) {
187                                 ret = 0; /* time expired. */
188                                 break;
189                         }
190                         ptval->tv_sec = tdif / 1000000000;
191                         ptval->tv_usec = (tdif % 1000000000) / 1000;
192                 }
193
194                 /* We must use select and not sys_select here. If we use
195                    sys_select we'd lose the fact a signal occurred when sys_select
196                    read a byte from the pipe. Fix from Mark Weaver
197                    <mark-clist@npsl.co.uk>
198                 */
199                 ret = select(maxfd, readfds2, writefds2, errorfds2, ptval);
200         } while (ret == -1 && errno == EINTR);
201
202         if (readfds)
203                 *readfds = readfds_buf;
204         if (writefds)
205                 *writefds = writefds_buf;
206         if (errorfds)
207                 *errorfds = errorfds_buf;
208
209         return ret;
210 }
211
212 /*
213  * sys_poll expects pollfd's to be a talloc'ed array.
214  *
215  * It expects the talloc_array_length(fds) >= num_fds+1 to give space
216  * to the signal pipe.
217  */
218
219 int sys_poll(struct pollfd *fds, int num_fds, int timeout)
220 {
221         int ret;
222
223         if (talloc_array_length(fds) < num_fds+1) {
224                 errno = ENOSPC;
225                 return -1;
226         }
227
228         if (initialised != sys_getpid()) {
229                 if (pipe(select_pipe) == -1)
230                 {
231                         int saved_errno = errno;
232                         DEBUG(0, ("sys_poll: pipe failed (%s)\n",
233                                 strerror(errno)));
234                         errno = saved_errno;
235                         return -1;
236                 }
237
238                 /*
239                  * These next two lines seem to fix a bug with the Linux
240                  * 2.0.x kernel (and probably other UNIXes as well) where
241                  * the one byte read below can block even though the
242                  * select returned that there is data in the pipe and
243                  * the pipe_written variable was incremented. Thanks to
244                  * HP for finding this one. JRA.
245                  */
246
247                 if(set_blocking(select_pipe[0],0)==-1)
248                         smb_panic("select_pipe[0]: O_NONBLOCK failed");
249                 if(set_blocking(select_pipe[1],0)==-1)
250                         smb_panic("select_pipe[1]: O_NONBLOCK failed");
251
252                 initialised = sys_getpid();
253         }
254
255         ZERO_STRUCT(fds[num_fds]);
256         fds[num_fds].fd = select_pipe[0];
257         fds[num_fds].events = POLLIN|POLLHUP;
258
259         errno = 0;
260         ret = poll(fds, num_fds+1, timeout);
261
262         if ((ret >= 0) && (fds[num_fds].revents & (POLLIN|POLLHUP|POLLERR))) {
263                 char c;
264                 int saved_errno = errno;
265
266                 if (read(select_pipe[0], &c, 1) == 1) {
267                         pipe_read += 1;
268
269                         /* Mark Weaver <mark-clist@npsl.co.uk> pointed out a critical
270                            fix to ensure we don't lose signals. We must always
271                            return -1 when the select pipe is set, otherwise if another
272                            fd is also ready (so ret == 2) then we used to eat the
273                            byte in the pipe and lose the signal. JRA.
274                         */
275                         ret = -1;
276 #if 0
277                         /* JRA - we can use this to debug the signal messaging... */
278                         DEBUG(0,("select got %u signal\n", (unsigned int)c));
279 #endif
280                         errno = EINTR;
281                 } else {
282                         ret -= 1;
283                         errno = saved_errno;
284                 }
285         }
286
287         return ret;
288 }
289
290 int sys_poll_intr(struct pollfd *fds, int num_fds, int timeout)
291 {
292         int orig_timeout = timeout;
293         struct timespec start;
294         int ret;
295
296         clock_gettime_mono(&start);
297
298         while (true) {
299                 struct timespec now;
300                 int64_t elapsed;
301
302                 ret = poll(fds, num_fds, timeout);
303                 if (ret != -1) {
304                         break;
305                 }
306                 if (errno != EINTR) {
307                         break;
308                 }
309                 clock_gettime_mono(&now);
310                 elapsed = nsec_time_diff(&now, &start);
311                 timeout = (orig_timeout - elapsed) / 1000000;
312         };
313         return ret;
314 }