Annotation of embedaddon/libevent/epoll.c, revision 1.1.1.1
1.1 misho 1: /*
2: * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>
3: * All rights reserved.
4: *
5: * Redistribution and use in source and binary forms, with or without
6: * modification, are permitted provided that the following conditions
7: * are met:
8: * 1. Redistributions of source code must retain the above copyright
9: * notice, this list of conditions and the following disclaimer.
10: * 2. Redistributions in binary form must reproduce the above copyright
11: * notice, this list of conditions and the following disclaimer in the
12: * documentation and/or other materials provided with the distribution.
13: * 3. The name of the author may not be used to endorse or promote products
14: * derived from this software without specific prior written permission.
15: *
16: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17: * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18: * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19: * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22: * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23: * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25: * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26: */
27: #ifdef HAVE_CONFIG_H
28: #include "config.h"
29: #endif
30:
31: #include <stdint.h>
32: #include <sys/types.h>
33: #include <sys/resource.h>
34: #ifdef HAVE_SYS_TIME_H
35: #include <sys/time.h>
36: #else
37: #include <sys/_libevent_time.h>
38: #endif
39: #include <sys/queue.h>
40: #include <sys/epoll.h>
41: #include <signal.h>
42: #include <stdio.h>
43: #include <stdlib.h>
44: #include <string.h>
45: #include <unistd.h>
46: #include <errno.h>
47: #ifdef HAVE_FCNTL_H
48: #include <fcntl.h>
49: #endif
50:
51: #include "event.h"
52: #include "event-internal.h"
53: #include "evsignal.h"
54: #include "log.h"
55:
56: /* due to limitations in the epoll interface, we need to keep track of
57: * all file descriptors outself.
58: */
59: struct evepoll {
60: struct event *evread;
61: struct event *evwrite;
62: };
63:
64: struct epollop {
65: struct evepoll *fds;
66: int nfds;
67: struct epoll_event *events;
68: int nevents;
69: int epfd;
70: };
71:
72: static void *epoll_init (struct event_base *);
73: static int epoll_add (void *, struct event *);
74: static int epoll_del (void *, struct event *);
75: static int epoll_dispatch (struct event_base *, void *, struct timeval *);
76: static void epoll_dealloc (struct event_base *, void *);
77:
78: const struct eventop epollops = {
79: "epoll",
80: epoll_init,
81: epoll_add,
82: epoll_del,
83: epoll_dispatch,
84: epoll_dealloc,
85: 1 /* need reinit */
86: };
87:
88: #ifdef HAVE_SETFD
89: #define FD_CLOSEONEXEC(x) do { \
90: if (fcntl(x, F_SETFD, 1) == -1) \
91: event_warn("fcntl(%d, F_SETFD)", x); \
92: } while (0)
93: #else
94: #define FD_CLOSEONEXEC(x)
95: #endif
96:
97: /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
98: * values bigger than (LONG_MAX - 999ULL)/HZ. HZ in the wild can be
99: * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
100: * largest number of msec we can support here is 2147482. Let's
101: * round that down by 47 seconds.
102: */
103: #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
104:
105: #define INITIAL_NFILES 32
106: #define INITIAL_NEVENTS 32
107: #define MAX_NEVENTS 4096
108:
109: static void *
110: epoll_init(struct event_base *base)
111: {
112: int epfd;
113: struct epollop *epollop;
114:
115: /* Disable epollueue when this environment variable is set */
116: if (evutil_getenv("EVENT_NOEPOLL"))
117: return (NULL);
118:
119: /* Initalize the kernel queue */
120: if ((epfd = epoll_create(32000)) == -1) {
121: if (errno != ENOSYS)
122: event_warn("epoll_create");
123: return (NULL);
124: }
125:
126: FD_CLOSEONEXEC(epfd);
127:
128: if (!(epollop = calloc(1, sizeof(struct epollop))))
129: return (NULL);
130:
131: epollop->epfd = epfd;
132:
133: /* Initalize fields */
134: epollop->events = malloc(INITIAL_NEVENTS * sizeof(struct epoll_event));
135: if (epollop->events == NULL) {
136: free(epollop);
137: return (NULL);
138: }
139: epollop->nevents = INITIAL_NEVENTS;
140:
141: epollop->fds = calloc(INITIAL_NFILES, sizeof(struct evepoll));
142: if (epollop->fds == NULL) {
143: free(epollop->events);
144: free(epollop);
145: return (NULL);
146: }
147: epollop->nfds = INITIAL_NFILES;
148:
149: evsignal_init(base);
150:
151: return (epollop);
152: }
153:
154: static int
155: epoll_recalc(struct event_base *base, void *arg, int max)
156: {
157: struct epollop *epollop = arg;
158:
159: if (max >= epollop->nfds) {
160: struct evepoll *fds;
161: int nfds;
162:
163: nfds = epollop->nfds;
164: while (nfds <= max)
165: nfds <<= 1;
166:
167: fds = realloc(epollop->fds, nfds * sizeof(struct evepoll));
168: if (fds == NULL) {
169: event_warn("realloc");
170: return (-1);
171: }
172: epollop->fds = fds;
173: memset(fds + epollop->nfds, 0,
174: (nfds - epollop->nfds) * sizeof(struct evepoll));
175: epollop->nfds = nfds;
176: }
177:
178: return (0);
179: }
180:
181: static int
182: epoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
183: {
184: struct epollop *epollop = arg;
185: struct epoll_event *events = epollop->events;
186: struct evepoll *evep;
187: int i, res, timeout = -1;
188:
189: if (tv != NULL)
190: timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
191:
192: if (timeout > MAX_EPOLL_TIMEOUT_MSEC) {
193: /* Linux kernels can wait forever if the timeout is too big;
194: * see comment on MAX_EPOLL_TIMEOUT_MSEC. */
195: timeout = MAX_EPOLL_TIMEOUT_MSEC;
196: }
197:
198: res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
199:
200: if (res == -1) {
201: if (errno != EINTR) {
202: event_warn("epoll_wait");
203: return (-1);
204: }
205:
206: evsignal_process(base);
207: return (0);
208: } else if (base->sig.evsignal_caught) {
209: evsignal_process(base);
210: }
211:
212: event_debug(("%s: epoll_wait reports %d", __func__, res));
213:
214: for (i = 0; i < res; i++) {
215: int what = events[i].events;
216: struct event *evread = NULL, *evwrite = NULL;
217: int fd = events[i].data.fd;
218:
219: if (fd < 0 || fd >= epollop->nfds)
220: continue;
221: evep = &epollop->fds[fd];
222:
223: if (what & (EPOLLHUP|EPOLLERR)) {
224: evread = evep->evread;
225: evwrite = evep->evwrite;
226: } else {
227: if (what & EPOLLIN) {
228: evread = evep->evread;
229: }
230:
231: if (what & EPOLLOUT) {
232: evwrite = evep->evwrite;
233: }
234: }
235:
236: if (!(evread||evwrite))
237: continue;
238:
239: if (evread != NULL)
240: event_active(evread, EV_READ, 1);
241: if (evwrite != NULL)
242: event_active(evwrite, EV_WRITE, 1);
243: }
244:
245: if (res == epollop->nevents && epollop->nevents < MAX_NEVENTS) {
246: /* We used all of the event space this time. We should
247: be ready for more events next time. */
248: int new_nevents = epollop->nevents * 2;
249: struct epoll_event *new_events;
250:
251: new_events = realloc(epollop->events,
252: new_nevents * sizeof(struct epoll_event));
253: if (new_events) {
254: epollop->events = new_events;
255: epollop->nevents = new_nevents;
256: }
257: }
258:
259: return (0);
260: }
261:
262:
263: static int
264: epoll_add(void *arg, struct event *ev)
265: {
266: struct epollop *epollop = arg;
267: struct epoll_event epev = {0, {0}};
268: struct evepoll *evep;
269: int fd, op, events;
270:
271: if (ev->ev_events & EV_SIGNAL)
272: return (evsignal_add(ev));
273:
274: fd = ev->ev_fd;
275: if (fd >= epollop->nfds) {
276: /* Extent the file descriptor array as necessary */
277: if (epoll_recalc(ev->ev_base, epollop, fd) == -1)
278: return (-1);
279: }
280: evep = &epollop->fds[fd];
281: op = EPOLL_CTL_ADD;
282: events = 0;
283: if (evep->evread != NULL) {
284: events |= EPOLLIN;
285: op = EPOLL_CTL_MOD;
286: }
287: if (evep->evwrite != NULL) {
288: events |= EPOLLOUT;
289: op = EPOLL_CTL_MOD;
290: }
291:
292: if (ev->ev_events & EV_READ)
293: events |= EPOLLIN;
294: if (ev->ev_events & EV_WRITE)
295: events |= EPOLLOUT;
296:
297: epev.data.fd = fd;
298: epev.events = events;
299: if (epoll_ctl(epollop->epfd, op, ev->ev_fd, &epev) == -1)
300: return (-1);
301:
302: /* Update events responsible */
303: if (ev->ev_events & EV_READ)
304: evep->evread = ev;
305: if (ev->ev_events & EV_WRITE)
306: evep->evwrite = ev;
307:
308: return (0);
309: }
310:
311: static int
312: epoll_del(void *arg, struct event *ev)
313: {
314: struct epollop *epollop = arg;
315: struct epoll_event epev = {0, {0}};
316: struct evepoll *evep;
317: int fd, events, op;
318: int needwritedelete = 1, needreaddelete = 1;
319:
320: if (ev->ev_events & EV_SIGNAL)
321: return (evsignal_del(ev));
322:
323: fd = ev->ev_fd;
324: if (fd >= epollop->nfds)
325: return (0);
326: evep = &epollop->fds[fd];
327:
328: op = EPOLL_CTL_DEL;
329: events = 0;
330:
331: if (ev->ev_events & EV_READ)
332: events |= EPOLLIN;
333: if (ev->ev_events & EV_WRITE)
334: events |= EPOLLOUT;
335:
336: if ((events & (EPOLLIN|EPOLLOUT)) != (EPOLLIN|EPOLLOUT)) {
337: if ((events & EPOLLIN) && evep->evwrite != NULL) {
338: needwritedelete = 0;
339: events = EPOLLOUT;
340: op = EPOLL_CTL_MOD;
341: } else if ((events & EPOLLOUT) && evep->evread != NULL) {
342: needreaddelete = 0;
343: events = EPOLLIN;
344: op = EPOLL_CTL_MOD;
345: }
346: }
347:
348: epev.events = events;
349: epev.data.fd = fd;
350:
351: if (needreaddelete)
352: evep->evread = NULL;
353: if (needwritedelete)
354: evep->evwrite = NULL;
355:
356: if (epoll_ctl(epollop->epfd, op, fd, &epev) == -1)
357: return (-1);
358:
359: return (0);
360: }
361:
362: static void
363: epoll_dealloc(struct event_base *base, void *arg)
364: {
365: struct epollop *epollop = arg;
366:
367: evsignal_dealloc(base);
368: if (epollop->fds)
369: free(epollop->fds);
370: if (epollop->events)
371: free(epollop->events);
372: if (epollop->epfd >= 0)
373: close(epollop->epfd);
374:
375: memset(epollop, 0, sizeof(struct epollop));
376: free(epollop);
377: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>