Annotation of embedaddon/libevent/epoll.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:  * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>
                      3:  * All rights reserved.
                      4:  *
                      5:  * Redistribution and use in source and binary forms, with or without
                      6:  * modification, are permitted provided that the following conditions
                      7:  * are met:
                      8:  * 1. Redistributions of source code must retain the above copyright
                      9:  *    notice, this list of conditions and the following disclaimer.
                     10:  * 2. Redistributions in binary form must reproduce the above copyright
                     11:  *    notice, this list of conditions and the following disclaimer in the
                     12:  *    documentation and/or other materials provided with the distribution.
                     13:  * 3. The name of the author may not be used to endorse or promote products
                     14:  *    derived from this software without specific prior written permission.
                     15:  *
                     16:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
                     17:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
                     18:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
                     19:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
                     20:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
                     21:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
                     22:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
                     23:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
                     24:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
                     25:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     26:  */
                     27: #ifdef HAVE_CONFIG_H
                     28: #include "config.h"
                     29: #endif
                     30: 
                     31: #include <stdint.h>
                     32: #include <sys/types.h>
                     33: #include <sys/resource.h>
                     34: #ifdef HAVE_SYS_TIME_H
                     35: #include <sys/time.h>
                     36: #else
                     37: #include <sys/_libevent_time.h>
                     38: #endif
                     39: #include <sys/queue.h>
                     40: #include <sys/epoll.h>
                     41: #include <signal.h>
                     42: #include <stdio.h>
                     43: #include <stdlib.h>
                     44: #include <string.h>
                     45: #include <unistd.h>
                     46: #include <errno.h>
                     47: #ifdef HAVE_FCNTL_H
                     48: #include <fcntl.h>
                     49: #endif
                     50: 
                     51: #include "event.h"
                     52: #include "event-internal.h"
                     53: #include "evsignal.h"
                     54: #include "log.h"
                     55: 
                     56: /* due to limitations in the epoll interface, we need to keep track of
                     57:  * all file descriptors outself.
                     58:  */
                     59: struct evepoll {
                     60:        struct event *evread;
                     61:        struct event *evwrite;
                     62: };
                     63: 
                     64: struct epollop {
                     65:        struct evepoll *fds;
                     66:        int nfds;
                     67:        struct epoll_event *events;
                     68:        int nevents;
                     69:        int epfd;
                     70: };
                     71: 
                     72: static void *epoll_init        (struct event_base *);
                     73: static int epoll_add   (void *, struct event *);
                     74: static int epoll_del   (void *, struct event *);
                     75: static int epoll_dispatch      (struct event_base *, void *, struct timeval *);
                     76: static void epoll_dealloc      (struct event_base *, void *);
                     77: 
                     78: const struct eventop epollops = {
                     79:        "epoll",
                     80:        epoll_init,
                     81:        epoll_add,
                     82:        epoll_del,
                     83:        epoll_dispatch,
                     84:        epoll_dealloc,
                     85:        1 /* need reinit */
                     86: };
                     87: 
                     88: #ifdef HAVE_SETFD
                     89: #define FD_CLOSEONEXEC(x) do { \
                     90:         if (fcntl(x, F_SETFD, 1) == -1) \
                     91:                 event_warn("fcntl(%d, F_SETFD)", x); \
                     92: } while (0)
                     93: #else
                     94: #define FD_CLOSEONEXEC(x)
                     95: #endif
                     96: 
                     97: /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
                     98:  * values bigger than (LONG_MAX - 999ULL)/HZ.  HZ in the wild can be
                     99:  * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
                    100:  * largest number of msec we can support here is 2147482.  Let's
                    101:  * round that down by 47 seconds.
                    102:  */
                    103: #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
                    104: 
                    105: #define INITIAL_NFILES 32
                    106: #define INITIAL_NEVENTS 32
                    107: #define MAX_NEVENTS 4096
                    108: 
                    109: static void *
                    110: epoll_init(struct event_base *base)
                    111: {
                    112:        int epfd;
                    113:        struct epollop *epollop;
                    114: 
                    115:        /* Disable epollueue when this environment variable is set */
                    116:        if (evutil_getenv("EVENT_NOEPOLL"))
                    117:                return (NULL);
                    118: 
                    119:        /* Initalize the kernel queue */
                    120:        if ((epfd = epoll_create(32000)) == -1) {
                    121:                if (errno != ENOSYS)
                    122:                        event_warn("epoll_create");
                    123:                return (NULL);
                    124:        }
                    125: 
                    126:        FD_CLOSEONEXEC(epfd);
                    127: 
                    128:        if (!(epollop = calloc(1, sizeof(struct epollop))))
                    129:                return (NULL);
                    130: 
                    131:        epollop->epfd = epfd;
                    132: 
                    133:        /* Initalize fields */
                    134:        epollop->events = malloc(INITIAL_NEVENTS * sizeof(struct epoll_event));
                    135:        if (epollop->events == NULL) {
                    136:                free(epollop);
                    137:                return (NULL);
                    138:        }
                    139:        epollop->nevents = INITIAL_NEVENTS;
                    140: 
                    141:        epollop->fds = calloc(INITIAL_NFILES, sizeof(struct evepoll));
                    142:        if (epollop->fds == NULL) {
                    143:                free(epollop->events);
                    144:                free(epollop);
                    145:                return (NULL);
                    146:        }
                    147:        epollop->nfds = INITIAL_NFILES;
                    148: 
                    149:        evsignal_init(base);
                    150: 
                    151:        return (epollop);
                    152: }
                    153: 
                    154: static int
                    155: epoll_recalc(struct event_base *base, void *arg, int max)
                    156: {
                    157:        struct epollop *epollop = arg;
                    158: 
                    159:        if (max >= epollop->nfds) {
                    160:                struct evepoll *fds;
                    161:                int nfds;
                    162: 
                    163:                nfds = epollop->nfds;
                    164:                while (nfds <= max)
                    165:                        nfds <<= 1;
                    166: 
                    167:                fds = realloc(epollop->fds, nfds * sizeof(struct evepoll));
                    168:                if (fds == NULL) {
                    169:                        event_warn("realloc");
                    170:                        return (-1);
                    171:                }
                    172:                epollop->fds = fds;
                    173:                memset(fds + epollop->nfds, 0,
                    174:                    (nfds - epollop->nfds) * sizeof(struct evepoll));
                    175:                epollop->nfds = nfds;
                    176:        }
                    177: 
                    178:        return (0);
                    179: }
                    180: 
                    181: static int
                    182: epoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
                    183: {
                    184:        struct epollop *epollop = arg;
                    185:        struct epoll_event *events = epollop->events;
                    186:        struct evepoll *evep;
                    187:        int i, res, timeout = -1;
                    188: 
                    189:        if (tv != NULL)
                    190:                timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
                    191: 
                    192:        if (timeout > MAX_EPOLL_TIMEOUT_MSEC) {
                    193:                /* Linux kernels can wait forever if the timeout is too big;
                    194:                 * see comment on MAX_EPOLL_TIMEOUT_MSEC. */
                    195:                timeout = MAX_EPOLL_TIMEOUT_MSEC;
                    196:        }
                    197: 
                    198:        res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
                    199: 
                    200:        if (res == -1) {
                    201:                if (errno != EINTR) {
                    202:                        event_warn("epoll_wait");
                    203:                        return (-1);
                    204:                }
                    205: 
                    206:                evsignal_process(base);
                    207:                return (0);
                    208:        } else if (base->sig.evsignal_caught) {
                    209:                evsignal_process(base);
                    210:        }
                    211: 
                    212:        event_debug(("%s: epoll_wait reports %d", __func__, res));
                    213: 
                    214:        for (i = 0; i < res; i++) {
                    215:                int what = events[i].events;
                    216:                struct event *evread = NULL, *evwrite = NULL;
                    217:                int fd = events[i].data.fd;
                    218: 
                    219:                if (fd < 0 || fd >= epollop->nfds)
                    220:                        continue;
                    221:                evep = &epollop->fds[fd];
                    222: 
                    223:                if (what & (EPOLLHUP|EPOLLERR)) {
                    224:                        evread = evep->evread;
                    225:                        evwrite = evep->evwrite;
                    226:                } else {
                    227:                        if (what & EPOLLIN) {
                    228:                                evread = evep->evread;
                    229:                        }
                    230: 
                    231:                        if (what & EPOLLOUT) {
                    232:                                evwrite = evep->evwrite;
                    233:                        }
                    234:                }
                    235: 
                    236:                if (!(evread||evwrite))
                    237:                        continue;
                    238: 
                    239:                if (evread != NULL)
                    240:                        event_active(evread, EV_READ, 1);
                    241:                if (evwrite != NULL)
                    242:                        event_active(evwrite, EV_WRITE, 1);
                    243:        }
                    244: 
                    245:        if (res == epollop->nevents && epollop->nevents < MAX_NEVENTS) {
                    246:                /* We used all of the event space this time.  We should
                    247:                   be ready for more events next time. */
                    248:                int new_nevents = epollop->nevents * 2;
                    249:                struct epoll_event *new_events;
                    250: 
                    251:                new_events = realloc(epollop->events,
                    252:                    new_nevents * sizeof(struct epoll_event));
                    253:                if (new_events) {
                    254:                        epollop->events = new_events;
                    255:                        epollop->nevents = new_nevents;
                    256:                }
                    257:        }
                    258: 
                    259:        return (0);
                    260: }
                    261: 
                    262: 
                    263: static int
                    264: epoll_add(void *arg, struct event *ev)
                    265: {
                    266:        struct epollop *epollop = arg;
                    267:        struct epoll_event epev = {0, {0}};
                    268:        struct evepoll *evep;
                    269:        int fd, op, events;
                    270: 
                    271:        if (ev->ev_events & EV_SIGNAL)
                    272:                return (evsignal_add(ev));
                    273: 
                    274:        fd = ev->ev_fd;
                    275:        if (fd >= epollop->nfds) {
                    276:                /* Extent the file descriptor array as necessary */
                    277:                if (epoll_recalc(ev->ev_base, epollop, fd) == -1)
                    278:                        return (-1);
                    279:        }
                    280:        evep = &epollop->fds[fd];
                    281:        op = EPOLL_CTL_ADD;
                    282:        events = 0;
                    283:        if (evep->evread != NULL) {
                    284:                events |= EPOLLIN;
                    285:                op = EPOLL_CTL_MOD;
                    286:        }
                    287:        if (evep->evwrite != NULL) {
                    288:                events |= EPOLLOUT;
                    289:                op = EPOLL_CTL_MOD;
                    290:        }
                    291: 
                    292:        if (ev->ev_events & EV_READ)
                    293:                events |= EPOLLIN;
                    294:        if (ev->ev_events & EV_WRITE)
                    295:                events |= EPOLLOUT;
                    296: 
                    297:        epev.data.fd = fd;
                    298:        epev.events = events;
                    299:        if (epoll_ctl(epollop->epfd, op, ev->ev_fd, &epev) == -1)
                    300:                        return (-1);
                    301: 
                    302:        /* Update events responsible */
                    303:        if (ev->ev_events & EV_READ)
                    304:                evep->evread = ev;
                    305:        if (ev->ev_events & EV_WRITE)
                    306:                evep->evwrite = ev;
                    307: 
                    308:        return (0);
                    309: }
                    310: 
                    311: static int
                    312: epoll_del(void *arg, struct event *ev)
                    313: {
                    314:        struct epollop *epollop = arg;
                    315:        struct epoll_event epev = {0, {0}};
                    316:        struct evepoll *evep;
                    317:        int fd, events, op;
                    318:        int needwritedelete = 1, needreaddelete = 1;
                    319: 
                    320:        if (ev->ev_events & EV_SIGNAL)
                    321:                return (evsignal_del(ev));
                    322: 
                    323:        fd = ev->ev_fd;
                    324:        if (fd >= epollop->nfds)
                    325:                return (0);
                    326:        evep = &epollop->fds[fd];
                    327: 
                    328:        op = EPOLL_CTL_DEL;
                    329:        events = 0;
                    330: 
                    331:        if (ev->ev_events & EV_READ)
                    332:                events |= EPOLLIN;
                    333:        if (ev->ev_events & EV_WRITE)
                    334:                events |= EPOLLOUT;
                    335: 
                    336:        if ((events & (EPOLLIN|EPOLLOUT)) != (EPOLLIN|EPOLLOUT)) {
                    337:                if ((events & EPOLLIN) && evep->evwrite != NULL) {
                    338:                        needwritedelete = 0;
                    339:                        events = EPOLLOUT;
                    340:                        op = EPOLL_CTL_MOD;
                    341:                } else if ((events & EPOLLOUT) && evep->evread != NULL) {
                    342:                        needreaddelete = 0;
                    343:                        events = EPOLLIN;
                    344:                        op = EPOLL_CTL_MOD;
                    345:                }
                    346:        }
                    347: 
                    348:        epev.events = events;
                    349:        epev.data.fd = fd;
                    350: 
                    351:        if (needreaddelete)
                    352:                evep->evread = NULL;
                    353:        if (needwritedelete)
                    354:                evep->evwrite = NULL;
                    355: 
                    356:        if (epoll_ctl(epollop->epfd, op, fd, &epev) == -1)
                    357:                return (-1);
                    358: 
                    359:        return (0);
                    360: }
                    361: 
                    362: static void
                    363: epoll_dealloc(struct event_base *base, void *arg)
                    364: {
                    365:        struct epollop *epollop = arg;
                    366: 
                    367:        evsignal_dealloc(base);
                    368:        if (epollop->fds)
                    369:                free(epollop->fds);
                    370:        if (epollop->events)
                    371:                free(epollop->events);
                    372:        if (epollop->epfd >= 0)
                    373:                close(epollop->epfd);
                    374: 
                    375:        memset(epollop, 0, sizeof(struct epollop));
                    376:        free(epollop);
                    377: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>