File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libevent / epoll.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:02:54 2012 UTC (12 years, 3 months ago) by misho
Branches: libevent, MAIN
CVS tags: v1_4_14bp0, v1_4_14b, HEAD
libevent

    1: /*
    2:  * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>
    3:  * All rights reserved.
    4:  *
    5:  * Redistribution and use in source and binary forms, with or without
    6:  * modification, are permitted provided that the following conditions
    7:  * are met:
    8:  * 1. Redistributions of source code must retain the above copyright
    9:  *    notice, this list of conditions and the following disclaimer.
   10:  * 2. Redistributions in binary form must reproduce the above copyright
   11:  *    notice, this list of conditions and the following disclaimer in the
   12:  *    documentation and/or other materials provided with the distribution.
   13:  * 3. The name of the author may not be used to endorse or promote products
   14:  *    derived from this software without specific prior written permission.
   15:  *
   16:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   17:  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   18:  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   19:  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   20:  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   21:  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   22:  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   23:  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   24:  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   25:  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   26:  */
   27: #ifdef HAVE_CONFIG_H
   28: #include "config.h"
   29: #endif
   30: 
   31: #include <stdint.h>
   32: #include <sys/types.h>
   33: #include <sys/resource.h>
   34: #ifdef HAVE_SYS_TIME_H
   35: #include <sys/time.h>
   36: #else
   37: #include <sys/_libevent_time.h>
   38: #endif
   39: #include <sys/queue.h>
   40: #include <sys/epoll.h>
   41: #include <signal.h>
   42: #include <stdio.h>
   43: #include <stdlib.h>
   44: #include <string.h>
   45: #include <unistd.h>
   46: #include <errno.h>
   47: #ifdef HAVE_FCNTL_H
   48: #include <fcntl.h>
   49: #endif
   50: 
   51: #include "event.h"
   52: #include "event-internal.h"
   53: #include "evsignal.h"
   54: #include "log.h"
   55: 
   56: /* due to limitations in the epoll interface, we need to keep track of
   57:  * all file descriptors outself.
   58:  */
   59: struct evepoll {
   60: 	struct event *evread;
   61: 	struct event *evwrite;
   62: };
   63: 
   64: struct epollop {
   65: 	struct evepoll *fds;
   66: 	int nfds;
   67: 	struct epoll_event *events;
   68: 	int nevents;
   69: 	int epfd;
   70: };
   71: 
   72: static void *epoll_init	(struct event_base *);
   73: static int epoll_add	(void *, struct event *);
   74: static int epoll_del	(void *, struct event *);
   75: static int epoll_dispatch	(struct event_base *, void *, struct timeval *);
   76: static void epoll_dealloc	(struct event_base *, void *);
   77: 
   78: const struct eventop epollops = {
   79: 	"epoll",
   80: 	epoll_init,
   81: 	epoll_add,
   82: 	epoll_del,
   83: 	epoll_dispatch,
   84: 	epoll_dealloc,
   85: 	1 /* need reinit */
   86: };
   87: 
   88: #ifdef HAVE_SETFD
   89: #define FD_CLOSEONEXEC(x) do { \
   90:         if (fcntl(x, F_SETFD, 1) == -1) \
   91:                 event_warn("fcntl(%d, F_SETFD)", x); \
   92: } while (0)
   93: #else
   94: #define FD_CLOSEONEXEC(x)
   95: #endif
   96: 
   97: /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
   98:  * values bigger than (LONG_MAX - 999ULL)/HZ.  HZ in the wild can be
   99:  * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
  100:  * largest number of msec we can support here is 2147482.  Let's
  101:  * round that down by 47 seconds.
  102:  */
  103: #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
  104: 
  105: #define INITIAL_NFILES 32
  106: #define INITIAL_NEVENTS 32
  107: #define MAX_NEVENTS 4096
  108: 
  109: static void *
  110: epoll_init(struct event_base *base)
  111: {
  112: 	int epfd;
  113: 	struct epollop *epollop;
  114: 
  115: 	/* Disable epollueue when this environment variable is set */
  116: 	if (evutil_getenv("EVENT_NOEPOLL"))
  117: 		return (NULL);
  118: 
  119: 	/* Initalize the kernel queue */
  120: 	if ((epfd = epoll_create(32000)) == -1) {
  121: 		if (errno != ENOSYS)
  122: 			event_warn("epoll_create");
  123: 		return (NULL);
  124: 	}
  125: 
  126: 	FD_CLOSEONEXEC(epfd);
  127: 
  128: 	if (!(epollop = calloc(1, sizeof(struct epollop))))
  129: 		return (NULL);
  130: 
  131: 	epollop->epfd = epfd;
  132: 
  133: 	/* Initalize fields */
  134: 	epollop->events = malloc(INITIAL_NEVENTS * sizeof(struct epoll_event));
  135: 	if (epollop->events == NULL) {
  136: 		free(epollop);
  137: 		return (NULL);
  138: 	}
  139: 	epollop->nevents = INITIAL_NEVENTS;
  140: 
  141: 	epollop->fds = calloc(INITIAL_NFILES, sizeof(struct evepoll));
  142: 	if (epollop->fds == NULL) {
  143: 		free(epollop->events);
  144: 		free(epollop);
  145: 		return (NULL);
  146: 	}
  147: 	epollop->nfds = INITIAL_NFILES;
  148: 
  149: 	evsignal_init(base);
  150: 
  151: 	return (epollop);
  152: }
  153: 
  154: static int
  155: epoll_recalc(struct event_base *base, void *arg, int max)
  156: {
  157: 	struct epollop *epollop = arg;
  158: 
  159: 	if (max >= epollop->nfds) {
  160: 		struct evepoll *fds;
  161: 		int nfds;
  162: 
  163: 		nfds = epollop->nfds;
  164: 		while (nfds <= max)
  165: 			nfds <<= 1;
  166: 
  167: 		fds = realloc(epollop->fds, nfds * sizeof(struct evepoll));
  168: 		if (fds == NULL) {
  169: 			event_warn("realloc");
  170: 			return (-1);
  171: 		}
  172: 		epollop->fds = fds;
  173: 		memset(fds + epollop->nfds, 0,
  174: 		    (nfds - epollop->nfds) * sizeof(struct evepoll));
  175: 		epollop->nfds = nfds;
  176: 	}
  177: 
  178: 	return (0);
  179: }
  180: 
  181: static int
  182: epoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
  183: {
  184: 	struct epollop *epollop = arg;
  185: 	struct epoll_event *events = epollop->events;
  186: 	struct evepoll *evep;
  187: 	int i, res, timeout = -1;
  188: 
  189: 	if (tv != NULL)
  190: 		timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
  191: 
  192: 	if (timeout > MAX_EPOLL_TIMEOUT_MSEC) {
  193: 		/* Linux kernels can wait forever if the timeout is too big;
  194: 		 * see comment on MAX_EPOLL_TIMEOUT_MSEC. */
  195: 		timeout = MAX_EPOLL_TIMEOUT_MSEC;
  196: 	}
  197: 
  198: 	res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
  199: 
  200: 	if (res == -1) {
  201: 		if (errno != EINTR) {
  202: 			event_warn("epoll_wait");
  203: 			return (-1);
  204: 		}
  205: 
  206: 		evsignal_process(base);
  207: 		return (0);
  208: 	} else if (base->sig.evsignal_caught) {
  209: 		evsignal_process(base);
  210: 	}
  211: 
  212: 	event_debug(("%s: epoll_wait reports %d", __func__, res));
  213: 
  214: 	for (i = 0; i < res; i++) {
  215: 		int what = events[i].events;
  216: 		struct event *evread = NULL, *evwrite = NULL;
  217: 		int fd = events[i].data.fd;
  218: 
  219: 		if (fd < 0 || fd >= epollop->nfds)
  220: 			continue;
  221: 		evep = &epollop->fds[fd];
  222: 
  223: 		if (what & (EPOLLHUP|EPOLLERR)) {
  224: 			evread = evep->evread;
  225: 			evwrite = evep->evwrite;
  226: 		} else {
  227: 			if (what & EPOLLIN) {
  228: 				evread = evep->evread;
  229: 			}
  230: 
  231: 			if (what & EPOLLOUT) {
  232: 				evwrite = evep->evwrite;
  233: 			}
  234: 		}
  235: 
  236: 		if (!(evread||evwrite))
  237: 			continue;
  238: 
  239: 		if (evread != NULL)
  240: 			event_active(evread, EV_READ, 1);
  241: 		if (evwrite != NULL)
  242: 			event_active(evwrite, EV_WRITE, 1);
  243: 	}
  244: 
  245: 	if (res == epollop->nevents && epollop->nevents < MAX_NEVENTS) {
  246: 		/* We used all of the event space this time.  We should
  247: 		   be ready for more events next time. */
  248: 		int new_nevents = epollop->nevents * 2;
  249: 		struct epoll_event *new_events;
  250: 
  251: 		new_events = realloc(epollop->events,
  252: 		    new_nevents * sizeof(struct epoll_event));
  253: 		if (new_events) {
  254: 			epollop->events = new_events;
  255: 			epollop->nevents = new_nevents;
  256: 		}
  257: 	}
  258: 
  259: 	return (0);
  260: }
  261: 
  262: 
  263: static int
  264: epoll_add(void *arg, struct event *ev)
  265: {
  266: 	struct epollop *epollop = arg;
  267: 	struct epoll_event epev = {0, {0}};
  268: 	struct evepoll *evep;
  269: 	int fd, op, events;
  270: 
  271: 	if (ev->ev_events & EV_SIGNAL)
  272: 		return (evsignal_add(ev));
  273: 
  274: 	fd = ev->ev_fd;
  275: 	if (fd >= epollop->nfds) {
  276: 		/* Extent the file descriptor array as necessary */
  277: 		if (epoll_recalc(ev->ev_base, epollop, fd) == -1)
  278: 			return (-1);
  279: 	}
  280: 	evep = &epollop->fds[fd];
  281: 	op = EPOLL_CTL_ADD;
  282: 	events = 0;
  283: 	if (evep->evread != NULL) {
  284: 		events |= EPOLLIN;
  285: 		op = EPOLL_CTL_MOD;
  286: 	}
  287: 	if (evep->evwrite != NULL) {
  288: 		events |= EPOLLOUT;
  289: 		op = EPOLL_CTL_MOD;
  290: 	}
  291: 
  292: 	if (ev->ev_events & EV_READ)
  293: 		events |= EPOLLIN;
  294: 	if (ev->ev_events & EV_WRITE)
  295: 		events |= EPOLLOUT;
  296: 
  297: 	epev.data.fd = fd;
  298: 	epev.events = events;
  299: 	if (epoll_ctl(epollop->epfd, op, ev->ev_fd, &epev) == -1)
  300: 			return (-1);
  301: 
  302: 	/* Update events responsible */
  303: 	if (ev->ev_events & EV_READ)
  304: 		evep->evread = ev;
  305: 	if (ev->ev_events & EV_WRITE)
  306: 		evep->evwrite = ev;
  307: 
  308: 	return (0);
  309: }
  310: 
  311: static int
  312: epoll_del(void *arg, struct event *ev)
  313: {
  314: 	struct epollop *epollop = arg;
  315: 	struct epoll_event epev = {0, {0}};
  316: 	struct evepoll *evep;
  317: 	int fd, events, op;
  318: 	int needwritedelete = 1, needreaddelete = 1;
  319: 
  320: 	if (ev->ev_events & EV_SIGNAL)
  321: 		return (evsignal_del(ev));
  322: 
  323: 	fd = ev->ev_fd;
  324: 	if (fd >= epollop->nfds)
  325: 		return (0);
  326: 	evep = &epollop->fds[fd];
  327: 
  328: 	op = EPOLL_CTL_DEL;
  329: 	events = 0;
  330: 
  331: 	if (ev->ev_events & EV_READ)
  332: 		events |= EPOLLIN;
  333: 	if (ev->ev_events & EV_WRITE)
  334: 		events |= EPOLLOUT;
  335: 
  336: 	if ((events & (EPOLLIN|EPOLLOUT)) != (EPOLLIN|EPOLLOUT)) {
  337: 		if ((events & EPOLLIN) && evep->evwrite != NULL) {
  338: 			needwritedelete = 0;
  339: 			events = EPOLLOUT;
  340: 			op = EPOLL_CTL_MOD;
  341: 		} else if ((events & EPOLLOUT) && evep->evread != NULL) {
  342: 			needreaddelete = 0;
  343: 			events = EPOLLIN;
  344: 			op = EPOLL_CTL_MOD;
  345: 		}
  346: 	}
  347: 
  348: 	epev.events = events;
  349: 	epev.data.fd = fd;
  350: 
  351: 	if (needreaddelete)
  352: 		evep->evread = NULL;
  353: 	if (needwritedelete)
  354: 		evep->evwrite = NULL;
  355: 
  356: 	if (epoll_ctl(epollop->epfd, op, fd, &epev) == -1)
  357: 		return (-1);
  358: 
  359: 	return (0);
  360: }
  361: 
  362: static void
  363: epoll_dealloc(struct event_base *base, void *arg)
  364: {
  365: 	struct epollop *epollop = arg;
  366: 
  367: 	evsignal_dealloc(base);
  368: 	if (epollop->fds)
  369: 		free(epollop->fds);
  370: 	if (epollop->events)
  371: 		free(epollop->events);
  372: 	if (epollop->epfd >= 0)
  373: 		close(epollop->epfd);
  374: 
  375: 	memset(epollop, 0, sizeof(struct epollop));
  376: 	free(epollop);
  377: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>