File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / bird / sysdep / unix / io.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Aug 22 12:33:54 2017 UTC (7 years ago) by misho
Branches: bird, MAIN
CVS tags: v1_6_3p0, v1_6_3, HEAD
bird 1.6.3

    1: /*
    2:  *	BIRD Internet Routing Daemon -- Unix I/O
    3:  *
    4:  *	(c) 1998--2004 Martin Mares <mj@ucw.cz>
    5:  *      (c) 2004       Ondrej Filip <feela@network.cz>
    6:  *
    7:  *	Can be freely distributed and used under the terms of the GNU GPL.
    8:  */
    9: 
   10: /* Unfortunately, some glibc versions hide parts of RFC 3542 API
   11:    if _GNU_SOURCE is not defined. */
   12: #ifndef _GNU_SOURCE
   13: #define _GNU_SOURCE
   14: #endif
   15: 
   16: #include <stdio.h>
   17: #include <stdlib.h>
   18: #include <time.h>
   19: #include <sys/time.h>
   20: #include <sys/types.h>
   21: #include <sys/socket.h>
   22: #include <sys/uio.h>
   23: #include <sys/un.h>
   24: #include <poll.h>
   25: #include <unistd.h>
   26: #include <fcntl.h>
   27: #include <errno.h>
   28: #include <net/if.h>
   29: #include <netinet/in.h>
   30: #include <netinet/tcp.h>
   31: #include <netinet/udp.h>
   32: #include <netinet/icmp6.h>
   33: 
   34: #include "nest/bird.h"
   35: #include "lib/lists.h"
   36: #include "lib/resource.h"
   37: #include "lib/timer.h"
   38: #include "lib/socket.h"
   39: #include "lib/event.h"
   40: #include "lib/string.h"
   41: #include "nest/iface.h"
   42: 
   43: #include "lib/unix.h"
   44: #include "lib/sysio.h"
   45: 
   46: /* Maximum number of calls of tx handler for one socket in one
   47:  * poll iteration. Should be small enough to not monopolize CPU by
   48:  * one protocol instance.
   49:  */
   50: #define MAX_STEPS 4
   51: 
   52: /* Maximum number of calls of rx handler for all sockets in one poll
   53:    iteration. RX callbacks are often much more costly so we limit
   54:    this to gen small latencies */
   55: #define MAX_RX_STEPS 4
   56: 
   57: /*
   58:  *	Tracked Files
   59:  */
   60: 
   61: struct rfile {
   62:   resource r;
   63:   FILE *f;
   64: };
   65: 
   66: static void
   67: rf_free(resource *r)
   68: {
   69:   struct rfile *a = (struct rfile *) r;
   70: 
   71:   fclose(a->f);
   72: }
   73: 
   74: static void
   75: rf_dump(resource *r)
   76: {
   77:   struct rfile *a = (struct rfile *) r;
   78: 
   79:   debug("(FILE *%p)\n", a->f);
   80: }
   81: 
   82: static struct resclass rf_class = {
   83:   "FILE",
   84:   sizeof(struct rfile),
   85:   rf_free,
   86:   rf_dump,
   87:   NULL,
   88:   NULL
   89: };
   90: 
   91: void *
   92: tracked_fopen(pool *p, char *name, char *mode)
   93: {
   94:   FILE *f = fopen(name, mode);
   95: 
   96:   if (f)
   97:     {
   98:       struct rfile *r = ralloc(p, &rf_class);
   99:       r->f = f;
  100:     }
  101:   return f;
  102: }
  103: 
  104: /**
  105:  * DOC: Timers
  106:  *
  107:  * Timers are resources which represent a wish of a module to call
  108:  * a function at the specified time. The platform dependent code
  109:  * doesn't guarantee exact timing, only that a timer function
  110:  * won't be called before the requested time.
  111:  *
  112:  * In BIRD, time is represented by values of the &bird_clock_t type
  113:  * which are integral numbers interpreted as a relative number of seconds since
  114:  * some fixed time point in past. The current time can be read
  115:  * from variable @now with reasonable accuracy and is monotonic. There is also
  116:  * a current 'absolute' time in variable @now_real reported by OS.
  117:  *
  118:  * Each timer is described by a &timer structure containing a pointer
  119:  * to the handler function (@hook), data private to this function (@data),
  120:  * time the function should be called at (@expires, 0 for inactive timers),
  121:  * for the other fields see |timer.h|.
  122:  */
  123: 
  124: #define NEAR_TIMER_LIMIT 4
  125: 
  126: static list near_timers, far_timers;
  127: static bird_clock_t first_far_timer = TIME_INFINITY;
  128: 
  129: /* now must be different from 0, because 0 is a special value in timer->expires */
  130: bird_clock_t now = 1, now_real, boot_time;
  131: 
  132: static void
  133: update_times_plain(void)
  134: {
  135:   bird_clock_t new_time = time(NULL);
  136:   int delta = new_time - now_real;
  137: 
  138:   if ((delta >= 0) && (delta < 60))
  139:     now += delta;
  140:   else if (now_real != 0)
  141:    log(L_WARN "Time jump, delta %d s", delta);
  142: 
  143:   now_real = new_time;
  144: }
  145: 
  146: static void
  147: update_times_gettime(void)
  148: {
  149:   struct timespec ts;
  150:   int rv;
  151: 
  152:   rv = clock_gettime(CLOCK_MONOTONIC, &ts);
  153:   if (rv != 0)
  154:     die("clock_gettime: %m");
  155: 
  156:   if (ts.tv_sec != now) {
  157:     if (ts.tv_sec < now)
  158:       log(L_ERR "Monotonic timer is broken");
  159: 
  160:     now = ts.tv_sec;
  161:     now_real = time(NULL);
  162:   }
  163: }
  164: 
  165: static int clock_monotonic_available;
  166: 
  167: static inline void
  168: update_times(void)
  169: {
  170:   if (clock_monotonic_available)
  171:     update_times_gettime();
  172:   else
  173:     update_times_plain();
  174: }
  175: 
  176: static inline void
  177: init_times(void)
  178: {
  179:  struct timespec ts;
  180:  clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
  181:  if (!clock_monotonic_available)
  182:    log(L_WARN "Monotonic timer is missing");
  183: }
  184: 
  185: 
  186: static void
  187: tm_free(resource *r)
  188: {
  189:   timer *t = (timer *) r;
  190: 
  191:   tm_stop(t);
  192: }
  193: 
  194: static void
  195: tm_dump(resource *r)
  196: {
  197:   timer *t = (timer *) r;
  198: 
  199:   debug("(code %p, data %p, ", t->hook, t->data);
  200:   if (t->randomize)
  201:     debug("rand %d, ", t->randomize);
  202:   if (t->recurrent)
  203:     debug("recur %d, ", t->recurrent);
  204:   if (t->expires)
  205:     debug("expires in %d sec)\n", t->expires - now);
  206:   else
  207:     debug("inactive)\n");
  208: }
  209: 
  210: static struct resclass tm_class = {
  211:   "Timer",
  212:   sizeof(timer),
  213:   tm_free,
  214:   tm_dump,
  215:   NULL,
  216:   NULL
  217: };
  218: 
  219: /**
  220:  * tm_new - create a timer
  221:  * @p: pool
  222:  *
  223:  * This function creates a new timer resource and returns
  224:  * a pointer to it. To use the timer, you need to fill in
  225:  * the structure fields and call tm_start() to start timing.
  226:  */
  227: timer *
  228: tm_new(pool *p)
  229: {
  230:   timer *t = ralloc(p, &tm_class);
  231:   return t;
  232: }
  233: 
  234: static inline void
  235: tm_insert_near(timer *t)
  236: {
  237:   node *n = HEAD(near_timers);
  238: 
  239:   while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
  240:     n = n->next;
  241:   insert_node(&t->n, n->prev);
  242: }
  243: 
  244: /**
  245:  * tm_start - start a timer
  246:  * @t: timer
  247:  * @after: number of seconds the timer should be run after
  248:  *
  249:  * This function schedules the hook function of the timer to
  250:  * be called after @after seconds. If the timer has been already
  251:  * started, it's @expire time is replaced by the new value.
  252:  *
  253:  * You can have set the @randomize field of @t, the timeout
  254:  * will be increased by a random number of seconds chosen
  255:  * uniformly from range 0 .. @randomize.
  256:  *
  257:  * You can call tm_start() from the handler function of the timer
  258:  * to request another run of the timer. Also, you can set the @recurrent
  259:  * field to have the timer re-added automatically with the same timeout.
  260:  */
  261: void
  262: tm_start(timer *t, unsigned after)
  263: {
  264:   bird_clock_t when;
  265: 
  266:   if (t->randomize)
  267:     after += random() % (t->randomize + 1);
  268:   when = now + after;
  269:   if (t->expires == when)
  270:     return;
  271:   if (t->expires)
  272:     rem_node(&t->n);
  273:   t->expires = when;
  274:   if (after <= NEAR_TIMER_LIMIT)
  275:     tm_insert_near(t);
  276:   else
  277:     {
  278:       if (!first_far_timer || first_far_timer > when)
  279: 	first_far_timer = when;
  280:       add_tail(&far_timers, &t->n);
  281:     }
  282: }
  283: 
  284: /**
  285:  * tm_stop - stop a timer
  286:  * @t: timer
  287:  *
  288:  * This function stops a timer. If the timer is already stopped,
  289:  * nothing happens.
  290:  */
  291: void
  292: tm_stop(timer *t)
  293: {
  294:   if (t->expires)
  295:     {
  296:       rem_node(&t->n);
  297:       t->expires = 0;
  298:     }
  299: }
  300: 
  301: static void
  302: tm_dump_them(char *name, list *l)
  303: {
  304:   node *n;
  305:   timer *t;
  306: 
  307:   debug("%s timers:\n", name);
  308:   WALK_LIST(n, *l)
  309:     {
  310:       t = SKIP_BACK(timer, n, n);
  311:       debug("%p ", t);
  312:       tm_dump(&t->r);
  313:     }
  314:   debug("\n");
  315: }
  316: 
  317: void
  318: tm_dump_all(void)
  319: {
  320:   tm_dump_them("Near", &near_timers);
  321:   tm_dump_them("Far", &far_timers);
  322: }
  323: 
  324: static inline time_t
  325: tm_first_shot(void)
  326: {
  327:   time_t x = first_far_timer;
  328: 
  329:   if (!EMPTY_LIST(near_timers))
  330:     {
  331:       timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
  332:       if (t->expires < x)
  333: 	x = t->expires;
  334:     }
  335:   return x;
  336: }
  337: 
  338: void io_log_event(void *hook, void *data);
  339: 
  340: static void
  341: tm_shot(void)
  342: {
  343:   timer *t;
  344:   node *n, *m;
  345: 
  346:   if (first_far_timer <= now)
  347:     {
  348:       bird_clock_t limit = now + NEAR_TIMER_LIMIT;
  349:       first_far_timer = TIME_INFINITY;
  350:       n = HEAD(far_timers);
  351:       while (m = n->next)
  352: 	{
  353: 	  t = SKIP_BACK(timer, n, n);
  354: 	  if (t->expires <= limit)
  355: 	    {
  356: 	      rem_node(n);
  357: 	      tm_insert_near(t);
  358: 	    }
  359: 	  else if (t->expires < first_far_timer)
  360: 	    first_far_timer = t->expires;
  361: 	  n = m;
  362: 	}
  363:     }
  364:   while ((n = HEAD(near_timers)) -> next)
  365:     {
  366:       int delay;
  367:       t = SKIP_BACK(timer, n, n);
  368:       if (t->expires > now)
  369: 	break;
  370:       rem_node(n);
  371:       delay = t->expires - now;
  372:       t->expires = 0;
  373:       if (t->recurrent)
  374: 	{
  375: 	  int i = t->recurrent - delay;
  376: 	  if (i < 0)
  377: 	    i = 0;
  378: 	  tm_start(t, i);
  379: 	}
  380:       io_log_event(t->hook, t->data);
  381:       t->hook(t);
  382:     }
  383: }
  384: 
  385: /**
  386:  * tm_parse_datetime - parse a date and time
  387:  * @x: datetime string
  388:  *
  389:  * tm_parse_datetime() takes a textual representation of
  390:  * a date and time (dd-mm-yyyy hh:mm:ss)
  391:  * and converts it to the corresponding value of type &bird_clock_t.
  392:  */
  393: bird_clock_t
  394: tm_parse_datetime(char *x)
  395: {
  396:   struct tm tm;
  397:   int n;
  398:   time_t t;
  399: 
  400:   if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
  401:     return tm_parse_date(x);
  402:   tm.tm_mon--;
  403:   tm.tm_year -= 1900;
  404:   t = mktime(&tm);
  405:   if (t == (time_t) -1)
  406:     return 0;
  407:   return t;
  408: }
  409: /**
  410:  * tm_parse_date - parse a date
  411:  * @x: date string
  412:  *
  413:  * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
  414:  * and converts it to the corresponding value of type &bird_clock_t.
  415:  */
  416: bird_clock_t
  417: tm_parse_date(char *x)
  418: {
  419:   struct tm tm;
  420:   int n;
  421:   time_t t;
  422: 
  423:   if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
  424:     return 0;
  425:   tm.tm_mon--;
  426:   tm.tm_year -= 1900;
  427:   tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
  428:   t = mktime(&tm);
  429:   if (t == (time_t) -1)
  430:     return 0;
  431:   return t;
  432: }
  433: 
  434: static void
  435: tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
  436: {
  437:   static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
  438: 				   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
  439: 
  440:   if (delta < 20*3600)
  441:     bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
  442:   else if (delta < 360*86400)
  443:     bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
  444:   else
  445:     bsprintf(x, "%d", tm->tm_year+1900);
  446: }
  447: 
  448: #include "conf/conf.h"
  449: 
  450: /**
  451:  * tm_format_datetime - convert date and time to textual representation
  452:  * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
  453:  * @fmt_spec: specification of resulting textual representation of the time
  454:  * @t: time
  455:  *
  456:  * This function formats the given relative time value @t to a textual
  457:  * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
  458:  */
  459: void
  460: tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
  461: {
  462:   const char *fmt_used;
  463:   struct tm *tm;
  464:   bird_clock_t delta = now - t;
  465:   t = now_real - delta;
  466:   tm = localtime(&t);
  467: 
  468:   if (fmt_spec->fmt1 == NULL)
  469:     return tm_format_reltime(x, tm, delta);
  470: 
  471:   if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
  472:     fmt_used = fmt_spec->fmt1;
  473:   else
  474:     fmt_used = fmt_spec->fmt2;
  475: 
  476:   int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
  477:   if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
  478:     strcpy(x, "<too-long>");
  479: }
  480: 
  481: 
  482: /**
  483:  * DOC: Sockets
  484:  *
  485:  * Socket resources represent network connections. Their data structure (&socket)
  486:  * contains a lot of fields defining the exact type of the socket, the local and
  487:  * remote addresses and ports, pointers to socket buffers and finally pointers to
  488:  * hook functions to be called when new data have arrived to the receive buffer
  489:  * (@rx_hook), when the contents of the transmit buffer have been transmitted
  490:  * (@tx_hook) and when an error or connection close occurs (@err_hook).
  491:  *
  492:  * Freeing of sockets from inside socket hooks is perfectly safe.
  493:  */
  494: 
  495: #ifndef SOL_IP
  496: #define SOL_IP IPPROTO_IP
  497: #endif
  498: 
  499: #ifndef SOL_IPV6
  500: #define SOL_IPV6 IPPROTO_IPV6
  501: #endif
  502: 
  503: #ifndef SOL_ICMPV6
  504: #define SOL_ICMPV6 IPPROTO_ICMPV6
  505: #endif
  506: 
  507: 
  508: /*
  509:  *	Sockaddr helper functions
  510:  */
  511: 
  512: static inline int UNUSED sockaddr_length(int af)
  513: { return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
  514: 
  515: static inline void
  516: sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
  517: {
  518:   memset(sa, 0, sizeof(struct sockaddr_in));
  519: #ifdef HAVE_SIN_LEN
  520:   sa->sin_len = sizeof(struct sockaddr_in);
  521: #endif
  522:   sa->sin_family = AF_INET;
  523:   sa->sin_port = htons(port);
  524:   sa->sin_addr = ipa_to_in4(a);
  525: }
  526: 
  527: static inline void
  528: sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
  529: {
  530:   memset(sa, 0, sizeof(struct sockaddr_in6));
  531: #ifdef SIN6_LEN
  532:   sa->sin6_len = sizeof(struct sockaddr_in6);
  533: #endif
  534:   sa->sin6_family = AF_INET6;
  535:   sa->sin6_port = htons(port);
  536:   sa->sin6_flowinfo = 0;
  537:   sa->sin6_addr = ipa_to_in6(a);
  538: 
  539:   if (ifa && ipa_is_link_local(a))
  540:     sa->sin6_scope_id = ifa->index;
  541: }
  542: 
  543: void
  544: sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
  545: {
  546:   if (af == AF_INET)
  547:     sockaddr_fill4((struct sockaddr_in *) sa, a, port);
  548:   else if (af == AF_INET6)
  549:     sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
  550:   else
  551:     bug("Unknown AF");
  552: }
  553: 
  554: static inline void
  555: sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
  556: {
  557:   *port = ntohs(sa->sin_port);
  558:   *a = ipa_from_in4(sa->sin_addr);
  559: }
  560: 
  561: static inline void
  562: sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
  563: {
  564:   *port = ntohs(sa->sin6_port);
  565:   *a = ipa_from_in6(sa->sin6_addr);
  566: 
  567:   if (ifa && ipa_is_link_local(*a))
  568:     *ifa = if_find_by_index(sa->sin6_scope_id);
  569: }
  570: 
  571: int
  572: sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
  573: {
  574:   if (sa->sa.sa_family != af)
  575:     goto fail;
  576: 
  577:   if (af == AF_INET)
  578:     sockaddr_read4((struct sockaddr_in *) sa, a, port);
  579:   else if (af == AF_INET6)
  580:     sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
  581:   else
  582:     goto fail;
  583: 
  584:   return 0;
  585: 
  586:  fail:
  587:   *a = IPA_NONE;
  588:   *port = 0;
  589:   return -1;
  590: }
  591: 
  592: 
  593: /*
  594:  *	IPv6 multicast syscalls
  595:  */
  596: 
  597: /* Fortunately standardized in RFC 3493 */
  598: 
  599: #define INIT_MREQ6(maddr,ifa) \
  600:   { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
  601: 
  602: static inline int
  603: sk_setup_multicast6(sock *s)
  604: {
  605:   int index = s->iface->index;
  606:   int ttl = s->ttl;
  607:   int n = 0;
  608: 
  609:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
  610:     ERR("IPV6_MULTICAST_IF");
  611: 
  612:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
  613:     ERR("IPV6_MULTICAST_HOPS");
  614: 
  615:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
  616:     ERR("IPV6_MULTICAST_LOOP");
  617: 
  618:   return 0;
  619: }
  620: 
  621: static inline int
  622: sk_join_group6(sock *s, ip_addr maddr)
  623: {
  624:   struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
  625: 
  626:   if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
  627:     ERR("IPV6_JOIN_GROUP");
  628: 
  629:   return 0;
  630: }
  631: 
  632: static inline int
  633: sk_leave_group6(sock *s, ip_addr maddr)
  634: {
  635:   struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
  636: 
  637:   if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
  638:     ERR("IPV6_LEAVE_GROUP");
  639: 
  640:   return 0;
  641: }
  642: 
  643: 
  644: /*
  645:  *	IPv6 packet control messages
  646:  */
  647: 
  648: /* Also standardized, in RFC 3542 */
  649: 
  650: /*
  651:  * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
  652:  * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
  653:  * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
  654:  * RFC and we use IPV6_PKTINFO.
  655:  */
  656: #ifndef IPV6_RECVPKTINFO
  657: #define IPV6_RECVPKTINFO IPV6_PKTINFO
  658: #endif
  659: /*
  660:  * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
  661:  */
  662: #ifndef IPV6_RECVHOPLIMIT
  663: #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
  664: #endif
  665: 
  666: 
  667: #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
  668: #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
  669: 
  670: static inline int
  671: sk_request_cmsg6_pktinfo(sock *s)
  672: {
  673:   int y = 1;
  674: 
  675:   if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
  676:     ERR("IPV6_RECVPKTINFO");
  677: 
  678:   return 0;
  679: }
  680: 
  681: static inline int
  682: sk_request_cmsg6_ttl(sock *s)
  683: {
  684:   int y = 1;
  685: 
  686:   if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
  687:     ERR("IPV6_RECVHOPLIMIT");
  688: 
  689:   return 0;
  690: }
  691: 
  692: static inline void
  693: sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
  694: {
  695:   if (cm->cmsg_type == IPV6_PKTINFO)
  696:   {
  697:     struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
  698:     s->laddr = ipa_from_in6(pi->ipi6_addr);
  699:     s->lifindex = pi->ipi6_ifindex;
  700:   }
  701: }
  702: 
  703: static inline void
  704: sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
  705: {
  706:   if (cm->cmsg_type == IPV6_HOPLIMIT)
  707:     s->rcv_ttl = * (int *) CMSG_DATA(cm);
  708: }
  709: 
  710: static inline void
  711: sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
  712: {
  713:   struct cmsghdr *cm;
  714:   struct in6_pktinfo *pi;
  715:   int controllen = 0;
  716: 
  717:   msg->msg_control = cbuf;
  718:   msg->msg_controllen = cbuflen;
  719: 
  720:   cm = CMSG_FIRSTHDR(msg);
  721:   cm->cmsg_level = SOL_IPV6;
  722:   cm->cmsg_type = IPV6_PKTINFO;
  723:   cm->cmsg_len = CMSG_LEN(sizeof(*pi));
  724:   controllen += CMSG_SPACE(sizeof(*pi));
  725: 
  726:   pi = (struct in6_pktinfo *) CMSG_DATA(cm);
  727:   pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
  728:   pi->ipi6_addr = ipa_to_in6(s->saddr);
  729: 
  730:   msg->msg_controllen = controllen;
  731: }
  732: 
  733: 
  734: /*
  735:  *	Miscellaneous socket syscalls
  736:  */
  737: 
  738: static inline int
  739: sk_set_ttl4(sock *s, int ttl)
  740: {
  741:   if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
  742:     ERR("IP_TTL");
  743: 
  744:   return 0;
  745: }
  746: 
  747: static inline int
  748: sk_set_ttl6(sock *s, int ttl)
  749: {
  750:   if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
  751:     ERR("IPV6_UNICAST_HOPS");
  752: 
  753:   return 0;
  754: }
  755: 
  756: static inline int
  757: sk_set_tos4(sock *s, int tos)
  758: {
  759:   if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
  760:     ERR("IP_TOS");
  761: 
  762:   return 0;
  763: }
  764: 
  765: static inline int
  766: sk_set_tos6(sock *s, int tos)
  767: {
  768:   if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
  769:     ERR("IPV6_TCLASS");
  770: 
  771:   return 0;
  772: }
  773: 
  774: static inline int
  775: sk_set_high_port(sock *s UNUSED)
  776: {
  777:   /* Port range setting is optional, ignore it if not supported */
  778: 
  779: #ifdef IP_PORTRANGE
  780:   if (sk_is_ipv4(s))
  781:   {
  782:     int range = IP_PORTRANGE_HIGH;
  783:     if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
  784:       ERR("IP_PORTRANGE");
  785:   }
  786: #endif
  787: 
  788: #ifdef IPV6_PORTRANGE
  789:   if (sk_is_ipv6(s))
  790:   {
  791:     int range = IPV6_PORTRANGE_HIGH;
  792:     if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
  793:       ERR("IPV6_PORTRANGE");
  794:   }
  795: #endif
  796: 
  797:   return 0;
  798: }
  799: 
  800: static inline byte *
  801: sk_skip_ip_header(byte *pkt, int *len)
  802: {
  803:   if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
  804:     return NULL;
  805: 
  806:   int hlen = (*pkt & 0x0f) * 4;
  807:   if ((hlen < 20) || (hlen > *len))
  808:     return NULL;
  809: 
  810:   *len -= hlen;
  811:   return pkt + hlen;
  812: }
  813: 
  814: byte *
  815: sk_rx_buffer(sock *s, int *len)
  816: {
  817:   if (sk_is_ipv4(s) && (s->type == SK_IP))
  818:     return sk_skip_ip_header(s->rbuf, len);
  819:   else
  820:     return s->rbuf;
  821: }
  822: 
  823: 
  824: /*
  825:  *	Public socket functions
  826:  */
  827: 
  828: /**
  829:  * sk_setup_multicast - enable multicast for given socket
  830:  * @s: socket
  831:  *
  832:  * Prepare transmission of multicast packets for given datagram socket.
  833:  * The socket must have defined @iface.
  834:  *
  835:  * Result: 0 for success, -1 for an error.
  836:  */
  837: 
  838: int
  839: sk_setup_multicast(sock *s)
  840: {
  841:   ASSERT(s->iface);
  842: 
  843:   if (sk_is_ipv4(s))
  844:     return sk_setup_multicast4(s);
  845:   else
  846:     return sk_setup_multicast6(s);
  847: }
  848: 
  849: /**
  850:  * sk_join_group - join multicast group for given socket
  851:  * @s: socket
  852:  * @maddr: multicast address
  853:  *
  854:  * Join multicast group for given datagram socket and associated interface.
  855:  * The socket must have defined @iface.
  856:  *
  857:  * Result: 0 for success, -1 for an error.
  858:  */
  859: 
  860: int
  861: sk_join_group(sock *s, ip_addr maddr)
  862: {
  863:   if (sk_is_ipv4(s))
  864:     return sk_join_group4(s, maddr);
  865:   else
  866:     return sk_join_group6(s, maddr);
  867: }
  868: 
  869: /**
  870:  * sk_leave_group - leave multicast group for given socket
  871:  * @s: socket
  872:  * @maddr: multicast address
  873:  *
  874:  * Leave multicast group for given datagram socket and associated interface.
  875:  * The socket must have defined @iface.
  876:  *
  877:  * Result: 0 for success, -1 for an error.
  878:  */
  879: 
  880: int
  881: sk_leave_group(sock *s, ip_addr maddr)
  882: {
  883:   if (sk_is_ipv4(s))
  884:     return sk_leave_group4(s, maddr);
  885:   else
  886:     return sk_leave_group6(s, maddr);
  887: }
  888: 
  889: /**
  890:  * sk_setup_broadcast - enable broadcast for given socket
  891:  * @s: socket
  892:  *
  893:  * Allow reception and transmission of broadcast packets for given datagram
  894:  * socket. The socket must have defined @iface. For transmission, packets should
  895:  * be send to @brd address of @iface.
  896:  *
  897:  * Result: 0 for success, -1 for an error.
  898:  */
  899: 
  900: int
  901: sk_setup_broadcast(sock *s)
  902: {
  903:   int y = 1;
  904: 
  905:   if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
  906:     ERR("SO_BROADCAST");
  907: 
  908:   return 0;
  909: }
  910: 
  911: /**
  912:  * sk_set_ttl - set transmit TTL for given socket
  913:  * @s: socket
  914:  * @ttl: TTL value
  915:  *
  916:  * Set TTL for already opened connections when TTL was not set before. Useful
  917:  * for accepted connections when different ones should have different TTL.
  918:  *
  919:  * Result: 0 for success, -1 for an error.
  920:  */
  921: 
  922: int
  923: sk_set_ttl(sock *s, int ttl)
  924: {
  925:   s->ttl = ttl;
  926: 
  927:   if (sk_is_ipv4(s))
  928:     return sk_set_ttl4(s, ttl);
  929:   else
  930:     return sk_set_ttl6(s, ttl);
  931: }
  932: 
  933: /**
  934:  * sk_set_min_ttl - set minimal accepted TTL for given socket
  935:  * @s: socket
  936:  * @ttl: TTL value
  937:  *
  938:  * Set minimal accepted TTL for given socket. Can be used for TTL security.
  939:  * implementations.
  940:  *
  941:  * Result: 0 for success, -1 for an error.
  942:  */
  943: 
  944: int
  945: sk_set_min_ttl(sock *s, int ttl)
  946: {
  947:   if (sk_is_ipv4(s))
  948:     return sk_set_min_ttl4(s, ttl);
  949:   else
  950:     return sk_set_min_ttl6(s, ttl);
  951: }
  952: 
  953: #if 0
  954: /**
  955:  * sk_set_md5_auth - add / remove MD5 security association for given socket
  956:  * @s: socket
  957:  * @local: IP address of local side
  958:  * @remote: IP address of remote side
  959:  * @ifa: Interface for link-local IP address
  960:  * @passwd: Password used for MD5 authentication
  961:  * @setkey: Update also system SA/SP database
  962:  *
  963:  * In TCP MD5 handling code in kernel, there is a set of security associations
  964:  * used for choosing password and other authentication parameters according to
  965:  * the local and remote address. This function is useful for listening socket,
  966:  * for active sockets it may be enough to set s->password field.
  967:  *
  968:  * When called with passwd != NULL, the new pair is added,
  969:  * When called with passwd == NULL, the existing pair is removed.
  970:  *
  971:  * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
  972:  * stored in global SA/SP database (but the behavior also must be enabled on
  973:  * per-socket basis). In case of multiple sockets to the same neighbor, the
  974:  * socket-specific state must be configured for each socket while global state
  975:  * just once per src-dst pair. The @setkey argument controls whether the global
  976:  * state (SA/SP database) is also updated.
  977:  *
  978:  * Result: 0 for success, -1 for an error.
  979:  */
  980: 
  981: int
  982: sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
  983: { DUMMY; }
  984: #endif
  985: 
  986: /**
  987:  * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
  988:  * @s: socket
  989:  * @offset: offset
  990:  *
  991:  * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
  992:  * kernel will automatically fill it for outgoing packets and check it for
  993:  * incoming packets. Should not be used on ICMPv6 sockets, where the position is
  994:  * known to the kernel.
  995:  *
  996:  * Result: 0 for success, -1 for an error.
  997:  */
  998: 
  999: int
 1000: sk_set_ipv6_checksum(sock *s, int offset)
 1001: {
 1002:   if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
 1003:     ERR("IPV6_CHECKSUM");
 1004: 
 1005:   return 0;
 1006: }
 1007: 
 1008: int
 1009: sk_set_icmp6_filter(sock *s, int p1, int p2)
 1010: {
 1011:   /* a bit of lame interface, but it is here only for Radv */
 1012:   struct icmp6_filter f;
 1013: 
 1014:   ICMP6_FILTER_SETBLOCKALL(&f);
 1015:   ICMP6_FILTER_SETPASS(p1, &f);
 1016:   ICMP6_FILTER_SETPASS(p2, &f);
 1017: 
 1018:   if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
 1019:     ERR("ICMP6_FILTER");
 1020: 
 1021:   return 0;
 1022: }
 1023: 
 1024: void
 1025: sk_log_error(sock *s, const char *p)
 1026: {
 1027:   log(L_ERR "%s: Socket error: %s%#m", p, s->err);
 1028: }
 1029: 
 1030: 
 1031: /*
 1032:  *	Actual struct birdsock code
 1033:  */
 1034: 
 1035: static list sock_list;
 1036: static struct birdsock *current_sock;
 1037: static struct birdsock *stored_sock;
 1038: 
 1039: static inline sock *
 1040: sk_next(sock *s)
 1041: {
 1042:   if (!s->n.next->next)
 1043:     return NULL;
 1044:   else
 1045:     return SKIP_BACK(sock, n, s->n.next);
 1046: }
 1047: 
 1048: static void
 1049: sk_alloc_bufs(sock *s)
 1050: {
 1051:   if (!s->rbuf && s->rbsize)
 1052:     s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
 1053:   s->rpos = s->rbuf;
 1054:   if (!s->tbuf && s->tbsize)
 1055:     s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
 1056:   s->tpos = s->ttx = s->tbuf;
 1057: }
 1058: 
 1059: static void
 1060: sk_free_bufs(sock *s)
 1061: {
 1062:   if (s->rbuf_alloc)
 1063:   {
 1064:     xfree(s->rbuf_alloc);
 1065:     s->rbuf = s->rbuf_alloc = NULL;
 1066:   }
 1067:   if (s->tbuf_alloc)
 1068:   {
 1069:     xfree(s->tbuf_alloc);
 1070:     s->tbuf = s->tbuf_alloc = NULL;
 1071:   }
 1072: }
 1073: 
 1074: static void
 1075: sk_free(resource *r)
 1076: {
 1077:   sock *s = (sock *) r;
 1078: 
 1079:   sk_free_bufs(s);
 1080:   if (s->fd >= 0)
 1081:   {
 1082:     close(s->fd);
 1083: 
 1084:     /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
 1085:     if (s->flags & SKF_THREAD)
 1086:       return;
 1087: 
 1088:     if (s == current_sock)
 1089:       current_sock = sk_next(s);
 1090:     if (s == stored_sock)
 1091:       stored_sock = sk_next(s);
 1092:     rem_node(&s->n);
 1093:   }
 1094: }
 1095: 
 1096: void
 1097: sk_set_rbsize(sock *s, uint val)
 1098: {
 1099:   ASSERT(s->rbuf_alloc == s->rbuf);
 1100: 
 1101:   if (s->rbsize == val)
 1102:     return;
 1103: 
 1104:   s->rbsize = val;
 1105:   xfree(s->rbuf_alloc);
 1106:   s->rbuf_alloc = xmalloc(val);
 1107:   s->rpos = s->rbuf = s->rbuf_alloc;
 1108: }
 1109: 
 1110: void
 1111: sk_set_tbsize(sock *s, uint val)
 1112: {
 1113:   ASSERT(s->tbuf_alloc == s->tbuf);
 1114: 
 1115:   if (s->tbsize == val)
 1116:     return;
 1117: 
 1118:   byte *old_tbuf = s->tbuf;
 1119: 
 1120:   s->tbsize = val;
 1121:   s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
 1122:   s->tpos = s->tbuf + (s->tpos - old_tbuf);
 1123:   s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
 1124: }
 1125: 
 1126: void
 1127: sk_set_tbuf(sock *s, void *tbuf)
 1128: {
 1129:   s->tbuf = tbuf ?: s->tbuf_alloc;
 1130:   s->ttx = s->tpos = s->tbuf;
 1131: }
 1132: 
 1133: void
 1134: sk_reallocate(sock *s)
 1135: {
 1136:   sk_free_bufs(s);
 1137:   sk_alloc_bufs(s);
 1138: }
 1139: 
 1140: static void
 1141: sk_dump(resource *r)
 1142: {
 1143:   sock *s = (sock *) r;
 1144:   static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
 1145: 
 1146:   debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
 1147: 	sk_type_names[s->type],
 1148: 	s->data,
 1149: 	s->saddr,
 1150: 	s->sport,
 1151: 	s->daddr,
 1152: 	s->dport,
 1153: 	s->tos,
 1154: 	s->ttl,
 1155: 	s->iface ? s->iface->name : "none");
 1156: }
 1157: 
 1158: static struct resclass sk_class = {
 1159:   "Socket",
 1160:   sizeof(sock),
 1161:   sk_free,
 1162:   sk_dump,
 1163:   NULL,
 1164:   NULL
 1165: };
 1166: 
 1167: /**
 1168:  * sk_new - create a socket
 1169:  * @p: pool
 1170:  *
 1171:  * This function creates a new socket resource. If you want to use it,
 1172:  * you need to fill in all the required fields of the structure and
 1173:  * call sk_open() to do the actual opening of the socket.
 1174:  *
 1175:  * The real function name is sock_new(), sk_new() is a macro wrapper
 1176:  * to avoid collision with OpenSSL.
 1177:  */
 1178: sock *
 1179: sock_new(pool *p)
 1180: {
 1181:   sock *s = ralloc(p, &sk_class);
 1182:   s->pool = p;
 1183:   // s->saddr = s->daddr = IPA_NONE;
 1184:   s->tos = s->priority = s->ttl = -1;
 1185:   s->fd = -1;
 1186:   return s;
 1187: }
 1188: 
 1189: static int
 1190: sk_setup(sock *s)
 1191: {
 1192:   int y = 1;
 1193:   int fd = s->fd;
 1194: 
 1195:   if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
 1196:     ERR("O_NONBLOCK");
 1197: 
 1198:   if (!s->af)
 1199:     return 0;
 1200: 
 1201:   if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
 1202:     s->flags |= SKF_PKTINFO;
 1203: 
 1204: #ifdef CONFIG_USE_HDRINCL
 1205:   if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
 1206:   {
 1207:     s->flags &= ~SKF_PKTINFO;
 1208:     s->flags |= SKF_HDRINCL;
 1209:     if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
 1210:       ERR("IP_HDRINCL");
 1211:   }
 1212: #endif
 1213: 
 1214:   if (s->iface)
 1215:   {
 1216: #ifdef SO_BINDTODEVICE
 1217:     struct ifreq ifr = {};
 1218:     strcpy(ifr.ifr_name, s->iface->name);
 1219:     if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
 1220:       ERR("SO_BINDTODEVICE");
 1221: #endif
 1222: 
 1223: #ifdef CONFIG_UNIX_DONTROUTE
 1224:     if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
 1225:       ERR("SO_DONTROUTE");
 1226: #endif
 1227:   }
 1228: 
 1229:   if (s->priority >= 0)
 1230:     if (sk_set_priority(s, s->priority) < 0)
 1231:       return -1;
 1232: 
 1233:   if (sk_is_ipv4(s))
 1234:   {
 1235:     if (s->flags & SKF_LADDR_RX)
 1236:       if (sk_request_cmsg4_pktinfo(s) < 0)
 1237: 	return -1;
 1238: 
 1239:     if (s->flags & SKF_TTL_RX)
 1240:       if (sk_request_cmsg4_ttl(s) < 0)
 1241: 	return -1;
 1242: 
 1243:     if ((s->type == SK_UDP) || (s->type == SK_IP))
 1244:       if (sk_disable_mtu_disc4(s) < 0)
 1245: 	return -1;
 1246: 
 1247:     if (s->ttl >= 0)
 1248:       if (sk_set_ttl4(s, s->ttl) < 0)
 1249: 	return -1;
 1250: 
 1251:     if (s->tos >= 0)
 1252:       if (sk_set_tos4(s, s->tos) < 0)
 1253: 	return -1;
 1254:   }
 1255: 
 1256:   if (sk_is_ipv6(s))
 1257:   {
 1258:     if (s->flags & SKF_V6ONLY)
 1259:       if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
 1260: 	ERR("IPV6_V6ONLY");
 1261: 
 1262:     if (s->flags & SKF_LADDR_RX)
 1263:       if (sk_request_cmsg6_pktinfo(s) < 0)
 1264: 	return -1;
 1265: 
 1266:     if (s->flags & SKF_TTL_RX)
 1267:       if (sk_request_cmsg6_ttl(s) < 0)
 1268: 	return -1;
 1269: 
 1270:     if ((s->type == SK_UDP) || (s->type == SK_IP))
 1271:       if (sk_disable_mtu_disc6(s) < 0)
 1272: 	return -1;
 1273: 
 1274:     if (s->ttl >= 0)
 1275:       if (sk_set_ttl6(s, s->ttl) < 0)
 1276: 	return -1;
 1277: 
 1278:     if (s->tos >= 0)
 1279:       if (sk_set_tos6(s, s->tos) < 0)
 1280: 	return -1;
 1281:   }
 1282: 
 1283:   return 0;
 1284: }
 1285: 
 1286: static void
 1287: sk_insert(sock *s)
 1288: {
 1289:   add_tail(&sock_list, &s->n);
 1290: }
 1291: 
 1292: static void
 1293: sk_tcp_connected(sock *s)
 1294: {
 1295:   sockaddr sa;
 1296:   int sa_len = sizeof(sa);
 1297: 
 1298:   if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
 1299:       (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
 1300:     log(L_WARN "SOCK: Cannot get local IP address for TCP>");
 1301: 
 1302:   s->type = SK_TCP;
 1303:   sk_alloc_bufs(s);
 1304:   s->tx_hook(s);
 1305: }
 1306: 
 1307: static int
 1308: sk_passive_connected(sock *s, int type)
 1309: {
 1310:   sockaddr loc_sa, rem_sa;
 1311:   int loc_sa_len = sizeof(loc_sa);
 1312:   int rem_sa_len = sizeof(rem_sa);
 1313: 
 1314:   int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
 1315:   if (fd < 0)
 1316:   {
 1317:     if ((errno != EINTR) && (errno != EAGAIN))
 1318:       s->err_hook(s, errno);
 1319:     return 0;
 1320:   }
 1321: 
 1322:   sock *t = sk_new(s->pool);
 1323:   t->type = type;
 1324:   t->fd = fd;
 1325:   t->af = s->af;
 1326:   t->ttl = s->ttl;
 1327:   t->tos = s->tos;
 1328:   t->rbsize = s->rbsize;
 1329:   t->tbsize = s->tbsize;
 1330: 
 1331:   if (type == SK_TCP)
 1332:   {
 1333:     if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
 1334: 	(sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
 1335:       log(L_WARN "SOCK: Cannot get local IP address for TCP<");
 1336: 
 1337:     if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
 1338:       log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
 1339:   }
 1340: 
 1341:   if (sk_setup(t) < 0)
 1342:   {
 1343:     /* FIXME: Call err_hook instead ? */
 1344:     log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
 1345: 
 1346:     /* FIXME: handle it better in rfree() */
 1347:     close(t->fd);
 1348:     t->fd = -1;
 1349:     rfree(t);
 1350:     return 1;
 1351:   }
 1352: 
 1353:   sk_insert(t);
 1354:   sk_alloc_bufs(t);
 1355:   s->rx_hook(t, 0);
 1356:   return 1;
 1357: }
 1358: 
 1359: /**
 1360:  * sk_open - open a socket
 1361:  * @s: socket
 1362:  *
 1363:  * This function takes a socket resource created by sk_new() and
 1364:  * initialized by the user and binds a corresponding network connection
 1365:  * to it.
 1366:  *
 1367:  * Result: 0 for success, -1 for an error.
 1368:  */
 1369: int
 1370: sk_open(sock *s)
 1371: {
 1372:   int af = BIRD_AF;
 1373:   int fd = -1;
 1374:   int do_bind = 0;
 1375:   int bind_port = 0;
 1376:   ip_addr bind_addr = IPA_NONE;
 1377:   sockaddr sa;
 1378: 
 1379:   switch (s->type)
 1380:   {
 1381:   case SK_TCP_ACTIVE:
 1382:     s->ttx = "";			/* Force s->ttx != s->tpos */
 1383:     /* Fall thru */
 1384:   case SK_TCP_PASSIVE:
 1385:     fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
 1386:     bind_port = s->sport;
 1387:     bind_addr = s->saddr;
 1388:     do_bind = bind_port || ipa_nonzero(bind_addr);
 1389:     break;
 1390: 
 1391:   case SK_UDP:
 1392:     fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
 1393:     bind_port = s->sport;
 1394:     bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
 1395:     do_bind = 1;
 1396:     break;
 1397: 
 1398:   case SK_IP:
 1399:     fd = socket(af, SOCK_RAW, s->dport);
 1400:     bind_port = 0;
 1401:     bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
 1402:     do_bind = ipa_nonzero(bind_addr);
 1403:     break;
 1404: 
 1405:   case SK_MAGIC:
 1406:     af = 0;
 1407:     fd = s->fd;
 1408:     break;
 1409: 
 1410:   default:
 1411:     bug("sk_open() called for invalid sock type %d", s->type);
 1412:   }
 1413: 
 1414:   if (fd < 0)
 1415:     ERR("socket");
 1416: 
 1417:   s->af = af;
 1418:   s->fd = fd;
 1419: 
 1420:   if (sk_setup(s) < 0)
 1421:     goto err;
 1422: 
 1423:   if (do_bind)
 1424:   {
 1425:     if (bind_port)
 1426:     {
 1427:       int y = 1;
 1428: 
 1429:       if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
 1430: 	ERR2("SO_REUSEADDR");
 1431: 
 1432: #ifdef CONFIG_NO_IFACE_BIND
 1433:       /* Workaround missing ability to bind to an iface */
 1434:       if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
 1435:       {
 1436: 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
 1437: 	  ERR2("SO_REUSEPORT");
 1438:       }
 1439: #endif
 1440:     }
 1441:     else
 1442:       if (s->flags & SKF_HIGH_PORT)
 1443: 	if (sk_set_high_port(s) < 0)
 1444: 	  log(L_WARN "Socket error: %s%#m", s->err);
 1445: 
 1446:     sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port);
 1447:     if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
 1448:       ERR2("bind");
 1449:   }
 1450: 
 1451:   if (s->password)
 1452:     if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
 1453:       goto err;
 1454: 
 1455:   switch (s->type)
 1456:   {
 1457:   case SK_TCP_ACTIVE:
 1458:     sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport);
 1459:     if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
 1460:       sk_tcp_connected(s);
 1461:     else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
 1462: 	     errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
 1463:       ERR2("connect");
 1464:     break;
 1465: 
 1466:   case SK_TCP_PASSIVE:
 1467:     if (listen(fd, 8) < 0)
 1468:       ERR2("listen");
 1469:     break;
 1470: 
 1471:   case SK_MAGIC:
 1472:     break;
 1473: 
 1474:   default:
 1475:     sk_alloc_bufs(s);
 1476:   }
 1477: 
 1478:   if (!(s->flags & SKF_THREAD))
 1479:     sk_insert(s);
 1480:   return 0;
 1481: 
 1482: err:
 1483:   close(fd);
 1484:   s->fd = -1;
 1485:   return -1;
 1486: }
 1487: 
 1488: int
 1489: sk_open_unix(sock *s, char *name)
 1490: {
 1491:   struct sockaddr_un sa;
 1492:   int fd;
 1493: 
 1494:   /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
 1495: 
 1496:   fd = socket(AF_UNIX, SOCK_STREAM, 0);
 1497:   if (fd < 0)
 1498:     return -1;
 1499: 
 1500:   if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
 1501:     return -1;
 1502: 
 1503:   /* Path length checked in test_old_bird() */
 1504:   sa.sun_family = AF_UNIX;
 1505:   strcpy(sa.sun_path, name);
 1506: 
 1507:   if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
 1508:     return -1;
 1509: 
 1510:   if (listen(fd, 8) < 0)
 1511:     return -1;
 1512: 
 1513:   s->fd = fd;
 1514:   sk_insert(s);
 1515:   return 0;
 1516: }
 1517: 
 1518: 
 1519: #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
 1520: 			  CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
 1521: #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
 1522: 
 1523: static void
 1524: sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
 1525: {
 1526:   if (sk_is_ipv4(s))
 1527:     sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
 1528:   else
 1529:     sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
 1530: }
 1531: 
 1532: static void
 1533: sk_process_cmsgs(sock *s, struct msghdr *msg)
 1534: {
 1535:   struct cmsghdr *cm;
 1536: 
 1537:   s->laddr = IPA_NONE;
 1538:   s->lifindex = 0;
 1539:   s->rcv_ttl = -1;
 1540: 
 1541:   for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
 1542:   {
 1543:     if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
 1544:     {
 1545:       sk_process_cmsg4_pktinfo(s, cm);
 1546:       sk_process_cmsg4_ttl(s, cm);
 1547:     }
 1548: 
 1549:     if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
 1550:     {
 1551:       sk_process_cmsg6_pktinfo(s, cm);
 1552:       sk_process_cmsg6_ttl(s, cm);
 1553:     }
 1554:   }
 1555: }
 1556: 
 1557: 
 1558: static inline int
 1559: sk_sendmsg(sock *s)
 1560: {
 1561:   struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
 1562:   byte cmsg_buf[CMSG_TX_SPACE];
 1563:   sockaddr dst;
 1564: 
 1565:   sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
 1566: 
 1567:   struct msghdr msg = {
 1568:     .msg_name = &dst.sa,
 1569:     .msg_namelen = SA_LEN(dst),
 1570:     .msg_iov = &iov,
 1571:     .msg_iovlen = 1
 1572:   };
 1573: 
 1574: #ifdef CONFIG_USE_HDRINCL
 1575:   byte hdr[20];
 1576:   struct iovec iov2[2] = { {hdr, 20}, iov };
 1577: 
 1578:   if (s->flags & SKF_HDRINCL)
 1579:   {
 1580:     sk_prepare_ip_header(s, hdr, iov.iov_len);
 1581:     msg.msg_iov = iov2;
 1582:     msg.msg_iovlen = 2;
 1583:   }
 1584: #endif
 1585: 
 1586:   if (s->flags & SKF_PKTINFO)
 1587:     sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
 1588: 
 1589:   return sendmsg(s->fd, &msg, 0);
 1590: }
 1591: 
 1592: static inline int
 1593: sk_recvmsg(sock *s)
 1594: {
 1595:   struct iovec iov = {s->rbuf, s->rbsize};
 1596:   byte cmsg_buf[CMSG_RX_SPACE];
 1597:   sockaddr src;
 1598: 
 1599:   struct msghdr msg = {
 1600:     .msg_name = &src.sa,
 1601:     .msg_namelen = sizeof(src), // XXXX ??
 1602:     .msg_iov = &iov,
 1603:     .msg_iovlen = 1,
 1604:     .msg_control = cmsg_buf,
 1605:     .msg_controllen = sizeof(cmsg_buf),
 1606:     .msg_flags = 0
 1607:   };
 1608: 
 1609:   int rv = recvmsg(s->fd, &msg, 0);
 1610:   if (rv < 0)
 1611:     return rv;
 1612: 
 1613:   //ifdef IPV4
 1614:   //  if (cf_type == SK_IP)
 1615:   //    rv = ipv4_skip_header(pbuf, rv);
 1616:   //endif
 1617: 
 1618:   sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
 1619:   sk_process_cmsgs(s, &msg);
 1620: 
 1621:   if (msg.msg_flags & MSG_TRUNC)
 1622:     s->flags |= SKF_TRUNCATED;
 1623:   else
 1624:     s->flags &= ~SKF_TRUNCATED;
 1625: 
 1626:   return rv;
 1627: }
 1628: 
 1629: 
 1630: static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
 1631: 
 1632: static int
 1633: sk_maybe_write(sock *s)
 1634: {
 1635:   int e;
 1636: 
 1637:   switch (s->type)
 1638:   {
 1639:   case SK_TCP:
 1640:   case SK_MAGIC:
 1641:   case SK_UNIX:
 1642:     while (s->ttx != s->tpos)
 1643:     {
 1644:       e = write(s->fd, s->ttx, s->tpos - s->ttx);
 1645: 
 1646:       if (e < 0)
 1647:       {
 1648: 	if (errno != EINTR && errno != EAGAIN)
 1649: 	{
 1650: 	  reset_tx_buffer(s);
 1651: 	  /* EPIPE is just a connection close notification during TX */
 1652: 	  s->err_hook(s, (errno != EPIPE) ? errno : 0);
 1653: 	  return -1;
 1654: 	}
 1655: 	return 0;
 1656:       }
 1657:       s->ttx += e;
 1658:     }
 1659:     reset_tx_buffer(s);
 1660:     return 1;
 1661: 
 1662:   case SK_UDP:
 1663:   case SK_IP:
 1664:     {
 1665:       if (s->tbuf == s->tpos)
 1666: 	return 1;
 1667: 
 1668:       e = sk_sendmsg(s);
 1669: 
 1670:       if (e < 0)
 1671:       {
 1672: 	if (errno != EINTR && errno != EAGAIN)
 1673: 	{
 1674: 	  reset_tx_buffer(s);
 1675: 	  s->err_hook(s, errno);
 1676: 	  return -1;
 1677: 	}
 1678: 
 1679: 	if (!s->tx_hook)
 1680: 	  reset_tx_buffer(s);
 1681: 	return 0;
 1682:       }
 1683:       reset_tx_buffer(s);
 1684:       return 1;
 1685:     }
 1686:   default:
 1687:     bug("sk_maybe_write: unknown socket type %d", s->type);
 1688:   }
 1689: }
 1690: 
 1691: int
 1692: sk_rx_ready(sock *s)
 1693: {
 1694:   int rv;
 1695:   struct pollfd pfd = { .fd = s->fd };
 1696:   pfd.events |= POLLIN;
 1697: 
 1698:  redo:
 1699:   rv = poll(&pfd, 1, 0);
 1700: 
 1701:   if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
 1702:     goto redo;
 1703: 
 1704:   return rv;
 1705: }
 1706: 
 1707: /**
 1708:  * sk_send - send data to a socket
 1709:  * @s: socket
 1710:  * @len: number of bytes to send
 1711:  *
 1712:  * This function sends @len bytes of data prepared in the
 1713:  * transmit buffer of the socket @s to the network connection.
 1714:  * If the packet can be sent immediately, it does so and returns
 1715:  * 1, else it queues the packet for later processing, returns 0
 1716:  * and calls the @tx_hook of the socket when the tranmission
 1717:  * takes place.
 1718:  */
 1719: int
 1720: sk_send(sock *s, unsigned len)
 1721: {
 1722:   s->ttx = s->tbuf;
 1723:   s->tpos = s->tbuf + len;
 1724:   return sk_maybe_write(s);
 1725: }
 1726: 
 1727: /**
 1728:  * sk_send_to - send data to a specific destination
 1729:  * @s: socket
 1730:  * @len: number of bytes to send
 1731:  * @addr: IP address to send the packet to
 1732:  * @port: port to send the packet to
 1733:  *
 1734:  * This is a sk_send() replacement for connection-less packet sockets
 1735:  * which allows destination of the packet to be chosen dynamically.
 1736:  * Raw IP sockets should use 0 for @port.
 1737:  */
 1738: int
 1739: sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
 1740: {
 1741:   s->daddr = addr;
 1742:   if (port)
 1743:     s->dport = port;
 1744: 
 1745:   s->ttx = s->tbuf;
 1746:   s->tpos = s->tbuf + len;
 1747:   return sk_maybe_write(s);
 1748: }
 1749: 
 1750: /*
 1751: int
 1752: sk_send_full(sock *s, unsigned len, struct iface *ifa,
 1753: 	     ip_addr saddr, ip_addr daddr, unsigned dport)
 1754: {
 1755:   s->iface = ifa;
 1756:   s->saddr = saddr;
 1757:   s->daddr = daddr;
 1758:   s->dport = dport;
 1759:   s->ttx = s->tbuf;
 1760:   s->tpos = s->tbuf + len;
 1761:   return sk_maybe_write(s);
 1762: }
 1763: */
 1764: 
 1765:  /* sk_read() and sk_write() are called from BFD's event loop */
 1766: 
 1767: int
 1768: sk_read(sock *s, int revents)
 1769: {
 1770:   switch (s->type)
 1771:   {
 1772:   case SK_TCP_PASSIVE:
 1773:     return sk_passive_connected(s, SK_TCP);
 1774: 
 1775:   case SK_UNIX_PASSIVE:
 1776:     return sk_passive_connected(s, SK_UNIX);
 1777: 
 1778:   case SK_TCP:
 1779:   case SK_UNIX:
 1780:     {
 1781:       int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
 1782: 
 1783:       if (c < 0)
 1784:       {
 1785: 	if (errno != EINTR && errno != EAGAIN)
 1786: 	  s->err_hook(s, errno);
 1787: 	else if (errno == EAGAIN && !(revents & POLLIN))
 1788: 	{
 1789: 	  log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
 1790: 	  s->err_hook(s, 0);
 1791: 	}
 1792:       }
 1793:       else if (!c)
 1794: 	s->err_hook(s, 0);
 1795:       else
 1796:       {
 1797: 	s->rpos += c;
 1798: 	if (s->rx_hook(s, s->rpos - s->rbuf))
 1799: 	{
 1800: 	  /* We need to be careful since the socket could have been deleted by the hook */
 1801: 	  if (current_sock == s)
 1802: 	    s->rpos = s->rbuf;
 1803: 	}
 1804: 	return 1;
 1805:       }
 1806:       return 0;
 1807:     }
 1808: 
 1809:   case SK_MAGIC:
 1810:     return s->rx_hook(s, 0);
 1811: 
 1812:   default:
 1813:     {
 1814:       int e = sk_recvmsg(s);
 1815: 
 1816:       if (e < 0)
 1817:       {
 1818: 	if (errno != EINTR && errno != EAGAIN)
 1819: 	  s->err_hook(s, errno);
 1820: 	return 0;
 1821:       }
 1822: 
 1823:       s->rpos = s->rbuf + e;
 1824:       s->rx_hook(s, e);
 1825:       return 1;
 1826:     }
 1827:   }
 1828: }
 1829: 
 1830: int
 1831: sk_write(sock *s)
 1832: {
 1833:   switch (s->type)
 1834:   {
 1835:   case SK_TCP_ACTIVE:
 1836:     {
 1837:       sockaddr sa;
 1838:       sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
 1839: 
 1840:       if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
 1841: 	sk_tcp_connected(s);
 1842:       else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
 1843: 	s->err_hook(s, errno);
 1844:       return 0;
 1845:     }
 1846: 
 1847:   default:
 1848:     if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
 1849:     {
 1850:       if (s->tx_hook)
 1851: 	s->tx_hook(s);
 1852:       return 1;
 1853:     }
 1854:     return 0;
 1855:   }
 1856: }
 1857: 
 1858: void
 1859: sk_err(sock *s, int revents)
 1860: {
 1861:   int se = 0, sse = sizeof(se);
 1862:   if ((s->type != SK_MAGIC) && (revents & POLLERR))
 1863:     if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
 1864:     {
 1865:       log(L_ERR "IO: Socket error: SO_ERROR: %m");
 1866:       se = 0;
 1867:     }
 1868: 
 1869:   s->err_hook(s, se);
 1870: }
 1871: 
 1872: void
 1873: sk_dump_all(void)
 1874: {
 1875:   node *n;
 1876:   sock *s;
 1877: 
 1878:   debug("Open sockets:\n");
 1879:   WALK_LIST(n, sock_list)
 1880:   {
 1881:     s = SKIP_BACK(sock, n, n);
 1882:     debug("%p ", s);
 1883:     sk_dump(&s->r);
 1884:   }
 1885:   debug("\n");
 1886: }
 1887: 
 1888: 
 1889: /*
 1890:  *	Internal event log and watchdog
 1891:  */
 1892: 
 1893: #define EVENT_LOG_LENGTH 32
 1894: 
 1895: struct event_log_entry
 1896: {
 1897:   void *hook;
 1898:   void *data;
 1899:   btime timestamp;
 1900:   btime duration;
 1901: };
 1902: 
 1903: static struct event_log_entry event_log[EVENT_LOG_LENGTH];
 1904: static struct event_log_entry *event_open;
 1905: static int event_log_pos, event_log_num, watchdog_active;
 1906: static btime last_time;
 1907: static btime loop_time;
 1908: 
 1909: static void
 1910: io_update_time(void)
 1911: {
 1912:   struct timespec ts;
 1913:   int rv;
 1914: 
 1915:   if (!clock_monotonic_available)
 1916:     return;
 1917: 
 1918:   /*
 1919:    * This is third time-tracking procedure (after update_times() above and
 1920:    * times_update() in BFD), dedicated to internal event log and latency
 1921:    * tracking. Hopefully, we consolidate these sometimes.
 1922:    */
 1923: 
 1924:   rv = clock_gettime(CLOCK_MONOTONIC, &ts);
 1925:   if (rv < 0)
 1926:     die("clock_gettime: %m");
 1927: 
 1928:   last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
 1929: 
 1930:   if (event_open)
 1931:   {
 1932:     event_open->duration = last_time - event_open->timestamp;
 1933: 
 1934:     if (event_open->duration > config->latency_limit)
 1935:       log(L_WARN "Event 0x%p 0x%p took %d ms",
 1936: 	  event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
 1937: 
 1938:     event_open = NULL;
 1939:   }
 1940: }
 1941: 
 1942: /**
 1943:  * io_log_event - mark approaching event into event log
 1944:  * @hook: event hook address
 1945:  * @data: event data address
 1946:  *
 1947:  * Store info (hook, data, timestamp) about the following internal event into
 1948:  * a circular event log (@event_log). When latency tracking is enabled, the log
 1949:  * entry is kept open (in @event_open) so the duration can be filled later.
 1950:  */
 1951: void
 1952: io_log_event(void *hook, void *data)
 1953: {
 1954:   if (config->latency_debug)
 1955:     io_update_time();
 1956: 
 1957:   struct event_log_entry *en = event_log + event_log_pos;
 1958: 
 1959:   en->hook = hook;
 1960:   en->data = data;
 1961:   en->timestamp = last_time;
 1962:   en->duration = 0;
 1963: 
 1964:   event_log_num++;
 1965:   event_log_pos++;
 1966:   event_log_pos %= EVENT_LOG_LENGTH;
 1967: 
 1968:   event_open = config->latency_debug ? en : NULL;
 1969: }
 1970: 
 1971: static inline void
 1972: io_close_event(void)
 1973: {
 1974:   if (event_open)
 1975:     io_update_time();
 1976: }
 1977: 
 1978: void
 1979: io_log_dump(void)
 1980: {
 1981:   int i;
 1982: 
 1983:   log(L_DEBUG "Event log:");
 1984:   for (i = 0; i < EVENT_LOG_LENGTH; i++)
 1985:   {
 1986:     struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
 1987:     if (en->hook)
 1988:       log(L_DEBUG "  Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
 1989: 	  (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
 1990:   }
 1991: }
 1992: 
 1993: void
 1994: watchdog_sigalrm(int sig UNUSED)
 1995: {
 1996:   /* Update last_time and duration, but skip latency check */
 1997:   config->latency_limit = 0xffffffff;
 1998:   io_update_time();
 1999: 
 2000:   /* We want core dump */
 2001:   abort();
 2002: }
 2003: 
 2004: static inline void
 2005: watchdog_start1(void)
 2006: {
 2007:   io_update_time();
 2008: 
 2009:   loop_time = last_time;
 2010: }
 2011: 
 2012: static inline void
 2013: watchdog_start(void)
 2014: {
 2015:   io_update_time();
 2016: 
 2017:   loop_time = last_time;
 2018:   event_log_num = 0;
 2019: 
 2020:   if (config->watchdog_timeout)
 2021:   {
 2022:     alarm(config->watchdog_timeout);
 2023:     watchdog_active = 1;
 2024:   }
 2025: }
 2026: 
 2027: static inline void
 2028: watchdog_stop(void)
 2029: {
 2030:   io_update_time();
 2031: 
 2032:   if (watchdog_active)
 2033:   {
 2034:     alarm(0);
 2035:     watchdog_active = 0;
 2036:   }
 2037: 
 2038:   btime duration = last_time - loop_time;
 2039:   if (duration > config->watchdog_warning)
 2040:     log(L_WARN "I/O loop cycle took %d ms for %d events",
 2041: 	(int) (duration TO_MS), event_log_num);
 2042: }
 2043: 
 2044: 
 2045: /*
 2046:  *	Main I/O Loop
 2047:  */
 2048: 
 2049: volatile int async_config_flag;		/* Asynchronous reconfiguration/dump scheduled */
 2050: volatile int async_dump_flag;
 2051: volatile int async_shutdown_flag;
 2052: 
 2053: void
 2054: io_init(void)
 2055: {
 2056:   init_list(&near_timers);
 2057:   init_list(&far_timers);
 2058:   init_list(&sock_list);
 2059:   init_list(&global_event_list);
 2060:   krt_io_init();
 2061:   init_times();
 2062:   update_times();
 2063:   boot_time = now;
 2064:   srandom((int) now_real);
 2065: }
 2066: 
 2067: static int short_loops = 0;
 2068: #define SHORT_LOOP_MAX 10
 2069: 
 2070: void
 2071: io_loop(void)
 2072: {
 2073:   int poll_tout;
 2074:   time_t tout;
 2075:   int nfds, events, pout;
 2076:   sock *s;
 2077:   node *n;
 2078:   int fdmax = 256;
 2079:   struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
 2080: 
 2081:   watchdog_start1();
 2082:   for(;;)
 2083:     {
 2084:       events = ev_run_list(&global_event_list);
 2085:     timers:
 2086:       update_times();
 2087:       tout = tm_first_shot();
 2088:       if (tout <= now)
 2089: 	{
 2090: 	  tm_shot();
 2091: 	  goto timers;
 2092: 	}
 2093:       poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */
 2094: 
 2095:       io_close_event();
 2096: 
 2097:       nfds = 0;
 2098:       WALK_LIST(n, sock_list)
 2099: 	{
 2100: 	  pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
 2101: 	  s = SKIP_BACK(sock, n, n);
 2102: 	  if (s->rx_hook)
 2103: 	    {
 2104: 	      pfd[nfds].fd = s->fd;
 2105: 	      pfd[nfds].events |= POLLIN;
 2106: 	    }
 2107: 	  if (s->tx_hook && s->ttx != s->tpos)
 2108: 	    {
 2109: 	      pfd[nfds].fd = s->fd;
 2110: 	      pfd[nfds].events |= POLLOUT;
 2111: 	    }
 2112: 	  if (pfd[nfds].fd != -1)
 2113: 	    {
 2114: 	      s->index = nfds;
 2115: 	      nfds++;
 2116: 	    }
 2117: 	  else
 2118: 	    s->index = -1;
 2119: 
 2120: 	  if (nfds >= fdmax)
 2121: 	    {
 2122: 	      fdmax *= 2;
 2123: 	      pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
 2124: 	    }
 2125: 	}
 2126: 
 2127:       /*
 2128:        * Yes, this is racy. But even if the signal comes before this test
 2129:        * and entering poll(), it gets caught on the next timer tick.
 2130:        */
 2131: 
 2132:       if (async_config_flag)
 2133: 	{
 2134: 	  io_log_event(async_config, NULL);
 2135: 	  async_config();
 2136: 	  async_config_flag = 0;
 2137: 	  continue;
 2138: 	}
 2139:       if (async_dump_flag)
 2140: 	{
 2141: 	  io_log_event(async_dump, NULL);
 2142: 	  async_dump();
 2143: 	  async_dump_flag = 0;
 2144: 	  continue;
 2145: 	}
 2146:       if (async_shutdown_flag)
 2147: 	{
 2148: 	  io_log_event(async_shutdown, NULL);
 2149: 	  async_shutdown();
 2150: 	  async_shutdown_flag = 0;
 2151: 	  continue;
 2152: 	}
 2153: 
 2154:       /* And finally enter poll() to find active sockets */
 2155:       watchdog_stop();
 2156:       pout = poll(pfd, nfds, poll_tout);
 2157:       watchdog_start();
 2158: 
 2159:       if (pout < 0)
 2160: 	{
 2161: 	  if (errno == EINTR || errno == EAGAIN)
 2162: 	    continue;
 2163: 	  die("poll: %m");
 2164: 	}
 2165:       if (pout)
 2166: 	{
 2167: 	  /* guaranteed to be non-empty */
 2168: 	  current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
 2169: 
 2170: 	  while (current_sock)
 2171: 	    {
 2172: 	      sock *s = current_sock;
 2173: 	      if (s->index == -1)
 2174: 		{
 2175: 		  current_sock = sk_next(s);
 2176: 		  goto next;
 2177: 		}
 2178: 
 2179: 	      int e;
 2180: 	      int steps;
 2181: 
 2182: 	      steps = MAX_STEPS;
 2183: 	      if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
 2184: 		do
 2185: 		  {
 2186: 		    steps--;
 2187: 		    io_log_event(s->rx_hook, s->data);
 2188: 		    e = sk_read(s, pfd[s->index].revents);
 2189: 		    if (s != current_sock)
 2190: 		      goto next;
 2191: 		  }
 2192: 		while (e && s->rx_hook && steps);
 2193: 
 2194: 	      steps = MAX_STEPS;
 2195: 	      if (pfd[s->index].revents & POLLOUT)
 2196: 		do
 2197: 		  {
 2198: 		    steps--;
 2199: 		    io_log_event(s->tx_hook, s->data);
 2200: 		    e = sk_write(s);
 2201: 		    if (s != current_sock)
 2202: 		      goto next;
 2203: 		  }
 2204: 		while (e && steps);
 2205: 
 2206: 	      current_sock = sk_next(s);
 2207: 	    next: ;
 2208: 	    }
 2209: 
 2210: 	  short_loops++;
 2211: 	  if (events && (short_loops < SHORT_LOOP_MAX))
 2212: 	    continue;
 2213: 	  short_loops = 0;
 2214: 
 2215: 	  int count = 0;
 2216: 	  current_sock = stored_sock;
 2217: 	  if (current_sock == NULL)
 2218: 	    current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
 2219: 
 2220: 	  while (current_sock && count < MAX_RX_STEPS)
 2221: 	    {
 2222: 	      sock *s = current_sock;
 2223: 	      if (s->index == -1)
 2224: 		{
 2225: 		  current_sock = sk_next(s);
 2226: 		  goto next2;
 2227: 		}
 2228: 
 2229: 	      if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
 2230: 		{
 2231: 		  count++;
 2232: 		  io_log_event(s->rx_hook, s->data);
 2233: 		  sk_read(s, pfd[s->index].revents);
 2234: 		  if (s != current_sock)
 2235: 		    goto next2;
 2236: 		}
 2237: 
 2238: 	      if (pfd[s->index].revents & (POLLHUP | POLLERR))
 2239: 		{
 2240: 		  sk_err(s, pfd[s->index].revents);
 2241: 		  if (s != current_sock)
 2242: 		    goto next2;
 2243: 		}
 2244: 
 2245: 	      current_sock = sk_next(s);
 2246: 	    next2: ;
 2247: 	    }
 2248: 
 2249: 
 2250: 	  stored_sock = current_sock;
 2251: 	}
 2252:     }
 2253: }
 2254: 
 2255: void
 2256: test_old_bird(char *path)
 2257: {
 2258:   int fd;
 2259:   struct sockaddr_un sa;
 2260: 
 2261:   fd = socket(AF_UNIX, SOCK_STREAM, 0);
 2262:   if (fd < 0)
 2263:     die("Cannot create socket: %m");
 2264:   if (strlen(path) >= sizeof(sa.sun_path))
 2265:     die("Socket path too long");
 2266:   bzero(&sa, sizeof(sa));
 2267:   sa.sun_family = AF_UNIX;
 2268:   strcpy(sa.sun_path, path);
 2269:   if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
 2270:     die("I found another BIRD running.");
 2271:   close(fd);
 2272: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>