Annotation of embedaddon/libevent/epoll.c, revision 1.1

1.1     ! misho       1: /*
        !             2:  * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>
        !             3:  * All rights reserved.
        !             4:  *
        !             5:  * Redistribution and use in source and binary forms, with or without
        !             6:  * modification, are permitted provided that the following conditions
        !             7:  * are met:
        !             8:  * 1. Redistributions of source code must retain the above copyright
        !             9:  *    notice, this list of conditions and the following disclaimer.
        !            10:  * 2. Redistributions in binary form must reproduce the above copyright
        !            11:  *    notice, this list of conditions and the following disclaimer in the
        !            12:  *    documentation and/or other materials provided with the distribution.
        !            13:  * 3. The name of the author may not be used to endorse or promote products
        !            14:  *    derived from this software without specific prior written permission.
        !            15:  *
        !            16:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
        !            17:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
        !            18:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
        !            19:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
        !            20:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
        !            21:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
        !            22:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
        !            23:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
        !            24:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
        !            25:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        !            26:  */
        !            27: #ifdef HAVE_CONFIG_H
        !            28: #include "config.h"
        !            29: #endif
        !            30: 
        !            31: #include <stdint.h>
        !            32: #include <sys/types.h>
        !            33: #include <sys/resource.h>
        !            34: #ifdef HAVE_SYS_TIME_H
        !            35: #include <sys/time.h>
        !            36: #else
        !            37: #include <sys/_libevent_time.h>
        !            38: #endif
        !            39: #include <sys/queue.h>
        !            40: #include <sys/epoll.h>
        !            41: #include <signal.h>
        !            42: #include <stdio.h>
        !            43: #include <stdlib.h>
        !            44: #include <string.h>
        !            45: #include <unistd.h>
        !            46: #include <errno.h>
        !            47: #ifdef HAVE_FCNTL_H
        !            48: #include <fcntl.h>
        !            49: #endif
        !            50: 
        !            51: #include "event.h"
        !            52: #include "event-internal.h"
        !            53: #include "evsignal.h"
        !            54: #include "log.h"
        !            55: 
        !            56: /* due to limitations in the epoll interface, we need to keep track of
        !            57:  * all file descriptors outself.
        !            58:  */
        !            59: struct evepoll {
        !            60:        struct event *evread;
        !            61:        struct event *evwrite;
        !            62: };
        !            63: 
        !            64: struct epollop {
        !            65:        struct evepoll *fds;
        !            66:        int nfds;
        !            67:        struct epoll_event *events;
        !            68:        int nevents;
        !            69:        int epfd;
        !            70: };
        !            71: 
        !            72: static void *epoll_init        (struct event_base *);
        !            73: static int epoll_add   (void *, struct event *);
        !            74: static int epoll_del   (void *, struct event *);
        !            75: static int epoll_dispatch      (struct event_base *, void *, struct timeval *);
        !            76: static void epoll_dealloc      (struct event_base *, void *);
        !            77: 
        !            78: const struct eventop epollops = {
        !            79:        "epoll",
        !            80:        epoll_init,
        !            81:        epoll_add,
        !            82:        epoll_del,
        !            83:        epoll_dispatch,
        !            84:        epoll_dealloc,
        !            85:        1 /* need reinit */
        !            86: };
        !            87: 
        !            88: #ifdef HAVE_SETFD
        !            89: #define FD_CLOSEONEXEC(x) do { \
        !            90:         if (fcntl(x, F_SETFD, 1) == -1) \
        !            91:                 event_warn("fcntl(%d, F_SETFD)", x); \
        !            92: } while (0)
        !            93: #else
        !            94: #define FD_CLOSEONEXEC(x)
        !            95: #endif
        !            96: 
        !            97: /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
        !            98:  * values bigger than (LONG_MAX - 999ULL)/HZ.  HZ in the wild can be
        !            99:  * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
        !           100:  * largest number of msec we can support here is 2147482.  Let's
        !           101:  * round that down by 47 seconds.
        !           102:  */
        !           103: #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
        !           104: 
        !           105: #define INITIAL_NFILES 32
        !           106: #define INITIAL_NEVENTS 32
        !           107: #define MAX_NEVENTS 4096
        !           108: 
        !           109: static void *
        !           110: epoll_init(struct event_base *base)
        !           111: {
        !           112:        int epfd;
        !           113:        struct epollop *epollop;
        !           114: 
        !           115:        /* Disable epollueue when this environment variable is set */
        !           116:        if (evutil_getenv("EVENT_NOEPOLL"))
        !           117:                return (NULL);
        !           118: 
        !           119:        /* Initalize the kernel queue */
        !           120:        if ((epfd = epoll_create(32000)) == -1) {
        !           121:                if (errno != ENOSYS)
        !           122:                        event_warn("epoll_create");
        !           123:                return (NULL);
        !           124:        }
        !           125: 
        !           126:        FD_CLOSEONEXEC(epfd);
        !           127: 
        !           128:        if (!(epollop = calloc(1, sizeof(struct epollop))))
        !           129:                return (NULL);
        !           130: 
        !           131:        epollop->epfd = epfd;
        !           132: 
        !           133:        /* Initalize fields */
        !           134:        epollop->events = malloc(INITIAL_NEVENTS * sizeof(struct epoll_event));
        !           135:        if (epollop->events == NULL) {
        !           136:                free(epollop);
        !           137:                return (NULL);
        !           138:        }
        !           139:        epollop->nevents = INITIAL_NEVENTS;
        !           140: 
        !           141:        epollop->fds = calloc(INITIAL_NFILES, sizeof(struct evepoll));
        !           142:        if (epollop->fds == NULL) {
        !           143:                free(epollop->events);
        !           144:                free(epollop);
        !           145:                return (NULL);
        !           146:        }
        !           147:        epollop->nfds = INITIAL_NFILES;
        !           148: 
        !           149:        evsignal_init(base);
        !           150: 
        !           151:        return (epollop);
        !           152: }
        !           153: 
        !           154: static int
        !           155: epoll_recalc(struct event_base *base, void *arg, int max)
        !           156: {
        !           157:        struct epollop *epollop = arg;
        !           158: 
        !           159:        if (max >= epollop->nfds) {
        !           160:                struct evepoll *fds;
        !           161:                int nfds;
        !           162: 
        !           163:                nfds = epollop->nfds;
        !           164:                while (nfds <= max)
        !           165:                        nfds <<= 1;
        !           166: 
        !           167:                fds = realloc(epollop->fds, nfds * sizeof(struct evepoll));
        !           168:                if (fds == NULL) {
        !           169:                        event_warn("realloc");
        !           170:                        return (-1);
        !           171:                }
        !           172:                epollop->fds = fds;
        !           173:                memset(fds + epollop->nfds, 0,
        !           174:                    (nfds - epollop->nfds) * sizeof(struct evepoll));
        !           175:                epollop->nfds = nfds;
        !           176:        }
        !           177: 
        !           178:        return (0);
        !           179: }
        !           180: 
        !           181: static int
        !           182: epoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
        !           183: {
        !           184:        struct epollop *epollop = arg;
        !           185:        struct epoll_event *events = epollop->events;
        !           186:        struct evepoll *evep;
        !           187:        int i, res, timeout = -1;
        !           188: 
        !           189:        if (tv != NULL)
        !           190:                timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
        !           191: 
        !           192:        if (timeout > MAX_EPOLL_TIMEOUT_MSEC) {
        !           193:                /* Linux kernels can wait forever if the timeout is too big;
        !           194:                 * see comment on MAX_EPOLL_TIMEOUT_MSEC. */
        !           195:                timeout = MAX_EPOLL_TIMEOUT_MSEC;
        !           196:        }
        !           197: 
        !           198:        res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
        !           199: 
        !           200:        if (res == -1) {
        !           201:                if (errno != EINTR) {
        !           202:                        event_warn("epoll_wait");
        !           203:                        return (-1);
        !           204:                }
        !           205: 
        !           206:                evsignal_process(base);
        !           207:                return (0);
        !           208:        } else if (base->sig.evsignal_caught) {
        !           209:                evsignal_process(base);
        !           210:        }
        !           211: 
        !           212:        event_debug(("%s: epoll_wait reports %d", __func__, res));
        !           213: 
        !           214:        for (i = 0; i < res; i++) {
        !           215:                int what = events[i].events;
        !           216:                struct event *evread = NULL, *evwrite = NULL;
        !           217:                int fd = events[i].data.fd;
        !           218: 
        !           219:                if (fd < 0 || fd >= epollop->nfds)
        !           220:                        continue;
        !           221:                evep = &epollop->fds[fd];
        !           222: 
        !           223:                if (what & (EPOLLHUP|EPOLLERR)) {
        !           224:                        evread = evep->evread;
        !           225:                        evwrite = evep->evwrite;
        !           226:                } else {
        !           227:                        if (what & EPOLLIN) {
        !           228:                                evread = evep->evread;
        !           229:                        }
        !           230: 
        !           231:                        if (what & EPOLLOUT) {
        !           232:                                evwrite = evep->evwrite;
        !           233:                        }
        !           234:                }
        !           235: 
        !           236:                if (!(evread||evwrite))
        !           237:                        continue;
        !           238: 
        !           239:                if (evread != NULL)
        !           240:                        event_active(evread, EV_READ, 1);
        !           241:                if (evwrite != NULL)
        !           242:                        event_active(evwrite, EV_WRITE, 1);
        !           243:        }
        !           244: 
        !           245:        if (res == epollop->nevents && epollop->nevents < MAX_NEVENTS) {
        !           246:                /* We used all of the event space this time.  We should
        !           247:                   be ready for more events next time. */
        !           248:                int new_nevents = epollop->nevents * 2;
        !           249:                struct epoll_event *new_events;
        !           250: 
        !           251:                new_events = realloc(epollop->events,
        !           252:                    new_nevents * sizeof(struct epoll_event));
        !           253:                if (new_events) {
        !           254:                        epollop->events = new_events;
        !           255:                        epollop->nevents = new_nevents;
        !           256:                }
        !           257:        }
        !           258: 
        !           259:        return (0);
        !           260: }
        !           261: 
        !           262: 
        !           263: static int
        !           264: epoll_add(void *arg, struct event *ev)
        !           265: {
        !           266:        struct epollop *epollop = arg;
        !           267:        struct epoll_event epev = {0, {0}};
        !           268:        struct evepoll *evep;
        !           269:        int fd, op, events;
        !           270: 
        !           271:        if (ev->ev_events & EV_SIGNAL)
        !           272:                return (evsignal_add(ev));
        !           273: 
        !           274:        fd = ev->ev_fd;
        !           275:        if (fd >= epollop->nfds) {
        !           276:                /* Extent the file descriptor array as necessary */
        !           277:                if (epoll_recalc(ev->ev_base, epollop, fd) == -1)
        !           278:                        return (-1);
        !           279:        }
        !           280:        evep = &epollop->fds[fd];
        !           281:        op = EPOLL_CTL_ADD;
        !           282:        events = 0;
        !           283:        if (evep->evread != NULL) {
        !           284:                events |= EPOLLIN;
        !           285:                op = EPOLL_CTL_MOD;
        !           286:        }
        !           287:        if (evep->evwrite != NULL) {
        !           288:                events |= EPOLLOUT;
        !           289:                op = EPOLL_CTL_MOD;
        !           290:        }
        !           291: 
        !           292:        if (ev->ev_events & EV_READ)
        !           293:                events |= EPOLLIN;
        !           294:        if (ev->ev_events & EV_WRITE)
        !           295:                events |= EPOLLOUT;
        !           296: 
        !           297:        epev.data.fd = fd;
        !           298:        epev.events = events;
        !           299:        if (epoll_ctl(epollop->epfd, op, ev->ev_fd, &epev) == -1)
        !           300:                        return (-1);
        !           301: 
        !           302:        /* Update events responsible */
        !           303:        if (ev->ev_events & EV_READ)
        !           304:                evep->evread = ev;
        !           305:        if (ev->ev_events & EV_WRITE)
        !           306:                evep->evwrite = ev;
        !           307: 
        !           308:        return (0);
        !           309: }
        !           310: 
        !           311: static int
        !           312: epoll_del(void *arg, struct event *ev)
        !           313: {
        !           314:        struct epollop *epollop = arg;
        !           315:        struct epoll_event epev = {0, {0}};
        !           316:        struct evepoll *evep;
        !           317:        int fd, events, op;
        !           318:        int needwritedelete = 1, needreaddelete = 1;
        !           319: 
        !           320:        if (ev->ev_events & EV_SIGNAL)
        !           321:                return (evsignal_del(ev));
        !           322: 
        !           323:        fd = ev->ev_fd;
        !           324:        if (fd >= epollop->nfds)
        !           325:                return (0);
        !           326:        evep = &epollop->fds[fd];
        !           327: 
        !           328:        op = EPOLL_CTL_DEL;
        !           329:        events = 0;
        !           330: 
        !           331:        if (ev->ev_events & EV_READ)
        !           332:                events |= EPOLLIN;
        !           333:        if (ev->ev_events & EV_WRITE)
        !           334:                events |= EPOLLOUT;
        !           335: 
        !           336:        if ((events & (EPOLLIN|EPOLLOUT)) != (EPOLLIN|EPOLLOUT)) {
        !           337:                if ((events & EPOLLIN) && evep->evwrite != NULL) {
        !           338:                        needwritedelete = 0;
        !           339:                        events = EPOLLOUT;
        !           340:                        op = EPOLL_CTL_MOD;
        !           341:                } else if ((events & EPOLLOUT) && evep->evread != NULL) {
        !           342:                        needreaddelete = 0;
        !           343:                        events = EPOLLIN;
        !           344:                        op = EPOLL_CTL_MOD;
        !           345:                }
        !           346:        }
        !           347: 
        !           348:        epev.events = events;
        !           349:        epev.data.fd = fd;
        !           350: 
        !           351:        if (needreaddelete)
        !           352:                evep->evread = NULL;
        !           353:        if (needwritedelete)
        !           354:                evep->evwrite = NULL;
        !           355: 
        !           356:        if (epoll_ctl(epollop->epfd, op, fd, &epev) == -1)
        !           357:                return (-1);
        !           358: 
        !           359:        return (0);
        !           360: }
        !           361: 
        !           362: static void
        !           363: epoll_dealloc(struct event_base *base, void *arg)
        !           364: {
        !           365:        struct epollop *epollop = arg;
        !           366: 
        !           367:        evsignal_dealloc(base);
        !           368:        if (epollop->fds)
        !           369:                free(epollop->fds);
        !           370:        if (epollop->events)
        !           371:                free(epollop->events);
        !           372:        if (epollop->epfd >= 0)
        !           373:                close(epollop->epfd);
        !           374: 
        !           375:        memset(epollop, 0, sizeof(struct epollop));
        !           376:        free(epollop);
        !           377: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>