Annotation of embedaddon/bird/sysdep/unix/io.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:  *     BIRD Internet Routing Daemon -- Unix I/O
                      3:  *
                      4:  *     (c) 1998--2004 Martin Mares <mj@ucw.cz>
                      5:  *      (c) 2004       Ondrej Filip <feela@network.cz>
                      6:  *
                      7:  *     Can be freely distributed and used under the terms of the GNU GPL.
                      8:  */
                      9: 
                     10: /* Unfortunately, some glibc versions hide parts of RFC 3542 API
                     11:    if _GNU_SOURCE is not defined. */
                     12: #ifndef _GNU_SOURCE
                     13: #define _GNU_SOURCE
                     14: #endif
                     15: 
                     16: #include <stdio.h>
                     17: #include <stdlib.h>
                     18: #include <time.h>
                     19: #include <sys/time.h>
                     20: #include <sys/types.h>
                     21: #include <sys/socket.h>
                     22: #include <sys/uio.h>
                     23: #include <sys/un.h>
                     24: #include <poll.h>
                     25: #include <unistd.h>
                     26: #include <fcntl.h>
                     27: #include <errno.h>
                     28: #include <net/if.h>
                     29: #include <netinet/in.h>
                     30: #include <netinet/tcp.h>
                     31: #include <netinet/udp.h>
                     32: #include <netinet/icmp6.h>
                     33: 
                     34: #include "nest/bird.h"
                     35: #include "lib/lists.h"
                     36: #include "lib/resource.h"
                     37: #include "lib/timer.h"
                     38: #include "lib/socket.h"
                     39: #include "lib/event.h"
                     40: #include "lib/string.h"
                     41: #include "nest/iface.h"
                     42: 
                     43: #include "lib/unix.h"
                     44: #include "lib/sysio.h"
                     45: 
                     46: /* Maximum number of calls of tx handler for one socket in one
                     47:  * poll iteration. Should be small enough to not monopolize CPU by
                     48:  * one protocol instance.
                     49:  */
                     50: #define MAX_STEPS 4
                     51: 
                     52: /* Maximum number of calls of rx handler for all sockets in one poll
                     53:    iteration. RX callbacks are often much more costly so we limit
                     54:    this to gen small latencies */
                     55: #define MAX_RX_STEPS 4
                     56: 
                     57: /*
                     58:  *     Tracked Files
                     59:  */
                     60: 
                     61: struct rfile {
                     62:   resource r;
                     63:   FILE *f;
                     64: };
                     65: 
                     66: static void
                     67: rf_free(resource *r)
                     68: {
                     69:   struct rfile *a = (struct rfile *) r;
                     70: 
                     71:   fclose(a->f);
                     72: }
                     73: 
                     74: static void
                     75: rf_dump(resource *r)
                     76: {
                     77:   struct rfile *a = (struct rfile *) r;
                     78: 
                     79:   debug("(FILE *%p)\n", a->f);
                     80: }
                     81: 
                     82: static struct resclass rf_class = {
                     83:   "FILE",
                     84:   sizeof(struct rfile),
                     85:   rf_free,
                     86:   rf_dump,
                     87:   NULL,
                     88:   NULL
                     89: };
                     90: 
                     91: void *
                     92: tracked_fopen(pool *p, char *name, char *mode)
                     93: {
                     94:   FILE *f = fopen(name, mode);
                     95: 
                     96:   if (f)
                     97:     {
                     98:       struct rfile *r = ralloc(p, &rf_class);
                     99:       r->f = f;
                    100:     }
                    101:   return f;
                    102: }
                    103: 
                    104: /**
                    105:  * DOC: Timers
                    106:  *
                    107:  * Timers are resources which represent a wish of a module to call
                    108:  * a function at the specified time. The platform dependent code
                    109:  * doesn't guarantee exact timing, only that a timer function
                    110:  * won't be called before the requested time.
                    111:  *
                    112:  * In BIRD, time is represented by values of the &bird_clock_t type
                    113:  * which are integral numbers interpreted as a relative number of seconds since
                    114:  * some fixed time point in past. The current time can be read
                    115:  * from variable @now with reasonable accuracy and is monotonic. There is also
                    116:  * a current 'absolute' time in variable @now_real reported by OS.
                    117:  *
                    118:  * Each timer is described by a &timer structure containing a pointer
                    119:  * to the handler function (@hook), data private to this function (@data),
                    120:  * time the function should be called at (@expires, 0 for inactive timers),
                    121:  * for the other fields see |timer.h|.
                    122:  */
                    123: 
                    124: #define NEAR_TIMER_LIMIT 4
                    125: 
                    126: static list near_timers, far_timers;
                    127: static bird_clock_t first_far_timer = TIME_INFINITY;
                    128: 
                    129: /* now must be different from 0, because 0 is a special value in timer->expires */
                    130: bird_clock_t now = 1, now_real, boot_time;
                    131: 
                    132: static void
                    133: update_times_plain(void)
                    134: {
                    135:   bird_clock_t new_time = time(NULL);
                    136:   int delta = new_time - now_real;
                    137: 
                    138:   if ((delta >= 0) && (delta < 60))
                    139:     now += delta;
                    140:   else if (now_real != 0)
                    141:    log(L_WARN "Time jump, delta %d s", delta);
                    142: 
                    143:   now_real = new_time;
                    144: }
                    145: 
                    146: static void
                    147: update_times_gettime(void)
                    148: {
                    149:   struct timespec ts;
                    150:   int rv;
                    151: 
                    152:   rv = clock_gettime(CLOCK_MONOTONIC, &ts);
                    153:   if (rv != 0)
                    154:     die("clock_gettime: %m");
                    155: 
                    156:   if (ts.tv_sec != now) {
                    157:     if (ts.tv_sec < now)
                    158:       log(L_ERR "Monotonic timer is broken");
                    159: 
                    160:     now = ts.tv_sec;
                    161:     now_real = time(NULL);
                    162:   }
                    163: }
                    164: 
                    165: static int clock_monotonic_available;
                    166: 
                    167: static inline void
                    168: update_times(void)
                    169: {
                    170:   if (clock_monotonic_available)
                    171:     update_times_gettime();
                    172:   else
                    173:     update_times_plain();
                    174: }
                    175: 
                    176: static inline void
                    177: init_times(void)
                    178: {
                    179:  struct timespec ts;
                    180:  clock_monotonic_available = (clock_gettime(CLOCK_MONOTONIC, &ts) == 0);
                    181:  if (!clock_monotonic_available)
                    182:    log(L_WARN "Monotonic timer is missing");
                    183: }
                    184: 
                    185: 
                    186: static void
                    187: tm_free(resource *r)
                    188: {
                    189:   timer *t = (timer *) r;
                    190: 
                    191:   tm_stop(t);
                    192: }
                    193: 
                    194: static void
                    195: tm_dump(resource *r)
                    196: {
                    197:   timer *t = (timer *) r;
                    198: 
                    199:   debug("(code %p, data %p, ", t->hook, t->data);
                    200:   if (t->randomize)
                    201:     debug("rand %d, ", t->randomize);
                    202:   if (t->recurrent)
                    203:     debug("recur %d, ", t->recurrent);
                    204:   if (t->expires)
                    205:     debug("expires in %d sec)\n", t->expires - now);
                    206:   else
                    207:     debug("inactive)\n");
                    208: }
                    209: 
                    210: static struct resclass tm_class = {
                    211:   "Timer",
                    212:   sizeof(timer),
                    213:   tm_free,
                    214:   tm_dump,
                    215:   NULL,
                    216:   NULL
                    217: };
                    218: 
                    219: /**
                    220:  * tm_new - create a timer
                    221:  * @p: pool
                    222:  *
                    223:  * This function creates a new timer resource and returns
                    224:  * a pointer to it. To use the timer, you need to fill in
                    225:  * the structure fields and call tm_start() to start timing.
                    226:  */
                    227: timer *
                    228: tm_new(pool *p)
                    229: {
                    230:   timer *t = ralloc(p, &tm_class);
                    231:   return t;
                    232: }
                    233: 
                    234: static inline void
                    235: tm_insert_near(timer *t)
                    236: {
                    237:   node *n = HEAD(near_timers);
                    238: 
                    239:   while (n->next && (SKIP_BACK(timer, n, n)->expires < t->expires))
                    240:     n = n->next;
                    241:   insert_node(&t->n, n->prev);
                    242: }
                    243: 
                    244: /**
                    245:  * tm_start - start a timer
                    246:  * @t: timer
                    247:  * @after: number of seconds the timer should be run after
                    248:  *
                    249:  * This function schedules the hook function of the timer to
                    250:  * be called after @after seconds. If the timer has been already
                    251:  * started, it's @expire time is replaced by the new value.
                    252:  *
                    253:  * You can have set the @randomize field of @t, the timeout
                    254:  * will be increased by a random number of seconds chosen
                    255:  * uniformly from range 0 .. @randomize.
                    256:  *
                    257:  * You can call tm_start() from the handler function of the timer
                    258:  * to request another run of the timer. Also, you can set the @recurrent
                    259:  * field to have the timer re-added automatically with the same timeout.
                    260:  */
                    261: void
                    262: tm_start(timer *t, unsigned after)
                    263: {
                    264:   bird_clock_t when;
                    265: 
                    266:   if (t->randomize)
                    267:     after += random() % (t->randomize + 1);
                    268:   when = now + after;
                    269:   if (t->expires == when)
                    270:     return;
                    271:   if (t->expires)
                    272:     rem_node(&t->n);
                    273:   t->expires = when;
                    274:   if (after <= NEAR_TIMER_LIMIT)
                    275:     tm_insert_near(t);
                    276:   else
                    277:     {
                    278:       if (!first_far_timer || first_far_timer > when)
                    279:        first_far_timer = when;
                    280:       add_tail(&far_timers, &t->n);
                    281:     }
                    282: }
                    283: 
                    284: /**
                    285:  * tm_stop - stop a timer
                    286:  * @t: timer
                    287:  *
                    288:  * This function stops a timer. If the timer is already stopped,
                    289:  * nothing happens.
                    290:  */
                    291: void
                    292: tm_stop(timer *t)
                    293: {
                    294:   if (t->expires)
                    295:     {
                    296:       rem_node(&t->n);
                    297:       t->expires = 0;
                    298:     }
                    299: }
                    300: 
                    301: static void
                    302: tm_dump_them(char *name, list *l)
                    303: {
                    304:   node *n;
                    305:   timer *t;
                    306: 
                    307:   debug("%s timers:\n", name);
                    308:   WALK_LIST(n, *l)
                    309:     {
                    310:       t = SKIP_BACK(timer, n, n);
                    311:       debug("%p ", t);
                    312:       tm_dump(&t->r);
                    313:     }
                    314:   debug("\n");
                    315: }
                    316: 
                    317: void
                    318: tm_dump_all(void)
                    319: {
                    320:   tm_dump_them("Near", &near_timers);
                    321:   tm_dump_them("Far", &far_timers);
                    322: }
                    323: 
                    324: static inline time_t
                    325: tm_first_shot(void)
                    326: {
                    327:   time_t x = first_far_timer;
                    328: 
                    329:   if (!EMPTY_LIST(near_timers))
                    330:     {
                    331:       timer *t = SKIP_BACK(timer, n, HEAD(near_timers));
                    332:       if (t->expires < x)
                    333:        x = t->expires;
                    334:     }
                    335:   return x;
                    336: }
                    337: 
                    338: void io_log_event(void *hook, void *data);
                    339: 
                    340: static void
                    341: tm_shot(void)
                    342: {
                    343:   timer *t;
                    344:   node *n, *m;
                    345: 
                    346:   if (first_far_timer <= now)
                    347:     {
                    348:       bird_clock_t limit = now + NEAR_TIMER_LIMIT;
                    349:       first_far_timer = TIME_INFINITY;
                    350:       n = HEAD(far_timers);
                    351:       while (m = n->next)
                    352:        {
                    353:          t = SKIP_BACK(timer, n, n);
                    354:          if (t->expires <= limit)
                    355:            {
                    356:              rem_node(n);
                    357:              tm_insert_near(t);
                    358:            }
                    359:          else if (t->expires < first_far_timer)
                    360:            first_far_timer = t->expires;
                    361:          n = m;
                    362:        }
                    363:     }
                    364:   while ((n = HEAD(near_timers)) -> next)
                    365:     {
                    366:       int delay;
                    367:       t = SKIP_BACK(timer, n, n);
                    368:       if (t->expires > now)
                    369:        break;
                    370:       rem_node(n);
                    371:       delay = t->expires - now;
                    372:       t->expires = 0;
                    373:       if (t->recurrent)
                    374:        {
                    375:          int i = t->recurrent - delay;
                    376:          if (i < 0)
                    377:            i = 0;
                    378:          tm_start(t, i);
                    379:        }
                    380:       io_log_event(t->hook, t->data);
                    381:       t->hook(t);
                    382:     }
                    383: }
                    384: 
                    385: /**
                    386:  * tm_parse_datetime - parse a date and time
                    387:  * @x: datetime string
                    388:  *
                    389:  * tm_parse_datetime() takes a textual representation of
                    390:  * a date and time (dd-mm-yyyy hh:mm:ss)
                    391:  * and converts it to the corresponding value of type &bird_clock_t.
                    392:  */
                    393: bird_clock_t
                    394: tm_parse_datetime(char *x)
                    395: {
                    396:   struct tm tm;
                    397:   int n;
                    398:   time_t t;
                    399: 
                    400:   if (sscanf(x, "%d-%d-%d %d:%d:%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &tm.tm_hour, &tm.tm_min, &tm.tm_sec, &n) != 6 || x[n])
                    401:     return tm_parse_date(x);
                    402:   tm.tm_mon--;
                    403:   tm.tm_year -= 1900;
                    404:   t = mktime(&tm);
                    405:   if (t == (time_t) -1)
                    406:     return 0;
                    407:   return t;
                    408: }
                    409: /**
                    410:  * tm_parse_date - parse a date
                    411:  * @x: date string
                    412:  *
                    413:  * tm_parse_date() takes a textual representation of a date (dd-mm-yyyy)
                    414:  * and converts it to the corresponding value of type &bird_clock_t.
                    415:  */
                    416: bird_clock_t
                    417: tm_parse_date(char *x)
                    418: {
                    419:   struct tm tm;
                    420:   int n;
                    421:   time_t t;
                    422: 
                    423:   if (sscanf(x, "%d-%d-%d%n", &tm.tm_mday, &tm.tm_mon, &tm.tm_year, &n) != 3 || x[n])
                    424:     return 0;
                    425:   tm.tm_mon--;
                    426:   tm.tm_year -= 1900;
                    427:   tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
                    428:   t = mktime(&tm);
                    429:   if (t == (time_t) -1)
                    430:     return 0;
                    431:   return t;
                    432: }
                    433: 
                    434: static void
                    435: tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta)
                    436: {
                    437:   static char *month_names[12] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
                    438:                                   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
                    439: 
                    440:   if (delta < 20*3600)
                    441:     bsprintf(x, "%02d:%02d", tm->tm_hour, tm->tm_min);
                    442:   else if (delta < 360*86400)
                    443:     bsprintf(x, "%s%02d", month_names[tm->tm_mon], tm->tm_mday);
                    444:   else
                    445:     bsprintf(x, "%d", tm->tm_year+1900);
                    446: }
                    447: 
                    448: #include "conf/conf.h"
                    449: 
                    450: /**
                    451:  * tm_format_datetime - convert date and time to textual representation
                    452:  * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE
                    453:  * @fmt_spec: specification of resulting textual representation of the time
                    454:  * @t: time
                    455:  *
                    456:  * This function formats the given relative time value @t to a textual
                    457:  * date/time representation (dd-mm-yyyy hh:mm:ss) in real time.
                    458:  */
                    459: void
                    460: tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t)
                    461: {
                    462:   const char *fmt_used;
                    463:   struct tm *tm;
                    464:   bird_clock_t delta = now - t;
                    465:   t = now_real - delta;
                    466:   tm = localtime(&t);
                    467: 
                    468:   if (fmt_spec->fmt1 == NULL)
                    469:     return tm_format_reltime(x, tm, delta);
                    470: 
                    471:   if ((fmt_spec->limit == 0) || (delta < fmt_spec->limit))
                    472:     fmt_used = fmt_spec->fmt1;
                    473:   else
                    474:     fmt_used = fmt_spec->fmt2;
                    475: 
                    476:   int rv = strftime(x, TM_DATETIME_BUFFER_SIZE, fmt_used, tm);
                    477:   if (((rv == 0) && fmt_used[0]) || (rv == TM_DATETIME_BUFFER_SIZE))
                    478:     strcpy(x, "<too-long>");
                    479: }
                    480: 
                    481: 
                    482: /**
                    483:  * DOC: Sockets
                    484:  *
                    485:  * Socket resources represent network connections. Their data structure (&socket)
                    486:  * contains a lot of fields defining the exact type of the socket, the local and
                    487:  * remote addresses and ports, pointers to socket buffers and finally pointers to
                    488:  * hook functions to be called when new data have arrived to the receive buffer
                    489:  * (@rx_hook), when the contents of the transmit buffer have been transmitted
                    490:  * (@tx_hook) and when an error or connection close occurs (@err_hook).
                    491:  *
                    492:  * Freeing of sockets from inside socket hooks is perfectly safe.
                    493:  */
                    494: 
                    495: #ifndef SOL_IP
                    496: #define SOL_IP IPPROTO_IP
                    497: #endif
                    498: 
                    499: #ifndef SOL_IPV6
                    500: #define SOL_IPV6 IPPROTO_IPV6
                    501: #endif
                    502: 
                    503: #ifndef SOL_ICMPV6
                    504: #define SOL_ICMPV6 IPPROTO_ICMPV6
                    505: #endif
                    506: 
                    507: 
                    508: /*
                    509:  *     Sockaddr helper functions
                    510:  */
                    511: 
                    512: static inline int UNUSED sockaddr_length(int af)
                    513: { return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
                    514: 
                    515: static inline void
                    516: sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
                    517: {
                    518:   memset(sa, 0, sizeof(struct sockaddr_in));
                    519: #ifdef HAVE_SIN_LEN
                    520:   sa->sin_len = sizeof(struct sockaddr_in);
                    521: #endif
                    522:   sa->sin_family = AF_INET;
                    523:   sa->sin_port = htons(port);
                    524:   sa->sin_addr = ipa_to_in4(a);
                    525: }
                    526: 
                    527: static inline void
                    528: sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
                    529: {
                    530:   memset(sa, 0, sizeof(struct sockaddr_in6));
                    531: #ifdef SIN6_LEN
                    532:   sa->sin6_len = sizeof(struct sockaddr_in6);
                    533: #endif
                    534:   sa->sin6_family = AF_INET6;
                    535:   sa->sin6_port = htons(port);
                    536:   sa->sin6_flowinfo = 0;
                    537:   sa->sin6_addr = ipa_to_in6(a);
                    538: 
                    539:   if (ifa && ipa_is_link_local(a))
                    540:     sa->sin6_scope_id = ifa->index;
                    541: }
                    542: 
                    543: void
                    544: sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
                    545: {
                    546:   if (af == AF_INET)
                    547:     sockaddr_fill4((struct sockaddr_in *) sa, a, port);
                    548:   else if (af == AF_INET6)
                    549:     sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
                    550:   else
                    551:     bug("Unknown AF");
                    552: }
                    553: 
                    554: static inline void
                    555: sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
                    556: {
                    557:   *port = ntohs(sa->sin_port);
                    558:   *a = ipa_from_in4(sa->sin_addr);
                    559: }
                    560: 
                    561: static inline void
                    562: sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
                    563: {
                    564:   *port = ntohs(sa->sin6_port);
                    565:   *a = ipa_from_in6(sa->sin6_addr);
                    566: 
                    567:   if (ifa && ipa_is_link_local(*a))
                    568:     *ifa = if_find_by_index(sa->sin6_scope_id);
                    569: }
                    570: 
                    571: int
                    572: sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
                    573: {
                    574:   if (sa->sa.sa_family != af)
                    575:     goto fail;
                    576: 
                    577:   if (af == AF_INET)
                    578:     sockaddr_read4((struct sockaddr_in *) sa, a, port);
                    579:   else if (af == AF_INET6)
                    580:     sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
                    581:   else
                    582:     goto fail;
                    583: 
                    584:   return 0;
                    585: 
                    586:  fail:
                    587:   *a = IPA_NONE;
                    588:   *port = 0;
                    589:   return -1;
                    590: }
                    591: 
                    592: 
                    593: /*
                    594:  *     IPv6 multicast syscalls
                    595:  */
                    596: 
                    597: /* Fortunately standardized in RFC 3493 */
                    598: 
                    599: #define INIT_MREQ6(maddr,ifa) \
                    600:   { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
                    601: 
                    602: static inline int
                    603: sk_setup_multicast6(sock *s)
                    604: {
                    605:   int index = s->iface->index;
                    606:   int ttl = s->ttl;
                    607:   int n = 0;
                    608: 
                    609:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
                    610:     ERR("IPV6_MULTICAST_IF");
                    611: 
                    612:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
                    613:     ERR("IPV6_MULTICAST_HOPS");
                    614: 
                    615:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
                    616:     ERR("IPV6_MULTICAST_LOOP");
                    617: 
                    618:   return 0;
                    619: }
                    620: 
                    621: static inline int
                    622: sk_join_group6(sock *s, ip_addr maddr)
                    623: {
                    624:   struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
                    625: 
                    626:   if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
                    627:     ERR("IPV6_JOIN_GROUP");
                    628: 
                    629:   return 0;
                    630: }
                    631: 
                    632: static inline int
                    633: sk_leave_group6(sock *s, ip_addr maddr)
                    634: {
                    635:   struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
                    636: 
                    637:   if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
                    638:     ERR("IPV6_LEAVE_GROUP");
                    639: 
                    640:   return 0;
                    641: }
                    642: 
                    643: 
                    644: /*
                    645:  *     IPv6 packet control messages
                    646:  */
                    647: 
                    648: /* Also standardized, in RFC 3542 */
                    649: 
                    650: /*
                    651:  * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
                    652:  * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
                    653:  * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
                    654:  * RFC and we use IPV6_PKTINFO.
                    655:  */
                    656: #ifndef IPV6_RECVPKTINFO
                    657: #define IPV6_RECVPKTINFO IPV6_PKTINFO
                    658: #endif
                    659: /*
                    660:  * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
                    661:  */
                    662: #ifndef IPV6_RECVHOPLIMIT
                    663: #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
                    664: #endif
                    665: 
                    666: 
                    667: #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
                    668: #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
                    669: 
                    670: static inline int
                    671: sk_request_cmsg6_pktinfo(sock *s)
                    672: {
                    673:   int y = 1;
                    674: 
                    675:   if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
                    676:     ERR("IPV6_RECVPKTINFO");
                    677: 
                    678:   return 0;
                    679: }
                    680: 
                    681: static inline int
                    682: sk_request_cmsg6_ttl(sock *s)
                    683: {
                    684:   int y = 1;
                    685: 
                    686:   if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
                    687:     ERR("IPV6_RECVHOPLIMIT");
                    688: 
                    689:   return 0;
                    690: }
                    691: 
                    692: static inline void
                    693: sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
                    694: {
                    695:   if (cm->cmsg_type == IPV6_PKTINFO)
                    696:   {
                    697:     struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
                    698:     s->laddr = ipa_from_in6(pi->ipi6_addr);
                    699:     s->lifindex = pi->ipi6_ifindex;
                    700:   }
                    701: }
                    702: 
                    703: static inline void
                    704: sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
                    705: {
                    706:   if (cm->cmsg_type == IPV6_HOPLIMIT)
                    707:     s->rcv_ttl = * (int *) CMSG_DATA(cm);
                    708: }
                    709: 
                    710: static inline void
                    711: sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
                    712: {
                    713:   struct cmsghdr *cm;
                    714:   struct in6_pktinfo *pi;
                    715:   int controllen = 0;
                    716: 
                    717:   msg->msg_control = cbuf;
                    718:   msg->msg_controllen = cbuflen;
                    719: 
                    720:   cm = CMSG_FIRSTHDR(msg);
                    721:   cm->cmsg_level = SOL_IPV6;
                    722:   cm->cmsg_type = IPV6_PKTINFO;
                    723:   cm->cmsg_len = CMSG_LEN(sizeof(*pi));
                    724:   controllen += CMSG_SPACE(sizeof(*pi));
                    725: 
                    726:   pi = (struct in6_pktinfo *) CMSG_DATA(cm);
                    727:   pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
                    728:   pi->ipi6_addr = ipa_to_in6(s->saddr);
                    729: 
                    730:   msg->msg_controllen = controllen;
                    731: }
                    732: 
                    733: 
                    734: /*
                    735:  *     Miscellaneous socket syscalls
                    736:  */
                    737: 
                    738: static inline int
                    739: sk_set_ttl4(sock *s, int ttl)
                    740: {
                    741:   if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
                    742:     ERR("IP_TTL");
                    743: 
                    744:   return 0;
                    745: }
                    746: 
                    747: static inline int
                    748: sk_set_ttl6(sock *s, int ttl)
                    749: {
                    750:   if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
                    751:     ERR("IPV6_UNICAST_HOPS");
                    752: 
                    753:   return 0;
                    754: }
                    755: 
                    756: static inline int
                    757: sk_set_tos4(sock *s, int tos)
                    758: {
                    759:   if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
                    760:     ERR("IP_TOS");
                    761: 
                    762:   return 0;
                    763: }
                    764: 
                    765: static inline int
                    766: sk_set_tos6(sock *s, int tos)
                    767: {
                    768:   if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
                    769:     ERR("IPV6_TCLASS");
                    770: 
                    771:   return 0;
                    772: }
                    773: 
                    774: static inline int
                    775: sk_set_high_port(sock *s UNUSED)
                    776: {
                    777:   /* Port range setting is optional, ignore it if not supported */
                    778: 
                    779: #ifdef IP_PORTRANGE
                    780:   if (sk_is_ipv4(s))
                    781:   {
                    782:     int range = IP_PORTRANGE_HIGH;
                    783:     if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
                    784:       ERR("IP_PORTRANGE");
                    785:   }
                    786: #endif
                    787: 
                    788: #ifdef IPV6_PORTRANGE
                    789:   if (sk_is_ipv6(s))
                    790:   {
                    791:     int range = IPV6_PORTRANGE_HIGH;
                    792:     if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
                    793:       ERR("IPV6_PORTRANGE");
                    794:   }
                    795: #endif
                    796: 
                    797:   return 0;
                    798: }
                    799: 
                    800: static inline byte *
                    801: sk_skip_ip_header(byte *pkt, int *len)
                    802: {
                    803:   if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
                    804:     return NULL;
                    805: 
                    806:   int hlen = (*pkt & 0x0f) * 4;
                    807:   if ((hlen < 20) || (hlen > *len))
                    808:     return NULL;
                    809: 
                    810:   *len -= hlen;
                    811:   return pkt + hlen;
                    812: }
                    813: 
                    814: byte *
                    815: sk_rx_buffer(sock *s, int *len)
                    816: {
                    817:   if (sk_is_ipv4(s) && (s->type == SK_IP))
                    818:     return sk_skip_ip_header(s->rbuf, len);
                    819:   else
                    820:     return s->rbuf;
                    821: }
                    822: 
                    823: 
                    824: /*
                    825:  *     Public socket functions
                    826:  */
                    827: 
                    828: /**
                    829:  * sk_setup_multicast - enable multicast for given socket
                    830:  * @s: socket
                    831:  *
                    832:  * Prepare transmission of multicast packets for given datagram socket.
                    833:  * The socket must have defined @iface.
                    834:  *
                    835:  * Result: 0 for success, -1 for an error.
                    836:  */
                    837: 
                    838: int
                    839: sk_setup_multicast(sock *s)
                    840: {
                    841:   ASSERT(s->iface);
                    842: 
                    843:   if (sk_is_ipv4(s))
                    844:     return sk_setup_multicast4(s);
                    845:   else
                    846:     return sk_setup_multicast6(s);
                    847: }
                    848: 
                    849: /**
                    850:  * sk_join_group - join multicast group for given socket
                    851:  * @s: socket
                    852:  * @maddr: multicast address
                    853:  *
                    854:  * Join multicast group for given datagram socket and associated interface.
                    855:  * The socket must have defined @iface.
                    856:  *
                    857:  * Result: 0 for success, -1 for an error.
                    858:  */
                    859: 
                    860: int
                    861: sk_join_group(sock *s, ip_addr maddr)
                    862: {
                    863:   if (sk_is_ipv4(s))
                    864:     return sk_join_group4(s, maddr);
                    865:   else
                    866:     return sk_join_group6(s, maddr);
                    867: }
                    868: 
                    869: /**
                    870:  * sk_leave_group - leave multicast group for given socket
                    871:  * @s: socket
                    872:  * @maddr: multicast address
                    873:  *
                    874:  * Leave multicast group for given datagram socket and associated interface.
                    875:  * The socket must have defined @iface.
                    876:  *
                    877:  * Result: 0 for success, -1 for an error.
                    878:  */
                    879: 
                    880: int
                    881: sk_leave_group(sock *s, ip_addr maddr)
                    882: {
                    883:   if (sk_is_ipv4(s))
                    884:     return sk_leave_group4(s, maddr);
                    885:   else
                    886:     return sk_leave_group6(s, maddr);
                    887: }
                    888: 
                    889: /**
                    890:  * sk_setup_broadcast - enable broadcast for given socket
                    891:  * @s: socket
                    892:  *
                    893:  * Allow reception and transmission of broadcast packets for given datagram
                    894:  * socket. The socket must have defined @iface. For transmission, packets should
                    895:  * be send to @brd address of @iface.
                    896:  *
                    897:  * Result: 0 for success, -1 for an error.
                    898:  */
                    899: 
                    900: int
                    901: sk_setup_broadcast(sock *s)
                    902: {
                    903:   int y = 1;
                    904: 
                    905:   if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
                    906:     ERR("SO_BROADCAST");
                    907: 
                    908:   return 0;
                    909: }
                    910: 
                    911: /**
                    912:  * sk_set_ttl - set transmit TTL for given socket
                    913:  * @s: socket
                    914:  * @ttl: TTL value
                    915:  *
                    916:  * Set TTL for already opened connections when TTL was not set before. Useful
                    917:  * for accepted connections when different ones should have different TTL.
                    918:  *
                    919:  * Result: 0 for success, -1 for an error.
                    920:  */
                    921: 
                    922: int
                    923: sk_set_ttl(sock *s, int ttl)
                    924: {
                    925:   s->ttl = ttl;
                    926: 
                    927:   if (sk_is_ipv4(s))
                    928:     return sk_set_ttl4(s, ttl);
                    929:   else
                    930:     return sk_set_ttl6(s, ttl);
                    931: }
                    932: 
                    933: /**
                    934:  * sk_set_min_ttl - set minimal accepted TTL for given socket
                    935:  * @s: socket
                    936:  * @ttl: TTL value
                    937:  *
                    938:  * Set minimal accepted TTL for given socket. Can be used for TTL security.
                    939:  * implementations.
                    940:  *
                    941:  * Result: 0 for success, -1 for an error.
                    942:  */
                    943: 
                    944: int
                    945: sk_set_min_ttl(sock *s, int ttl)
                    946: {
                    947:   if (sk_is_ipv4(s))
                    948:     return sk_set_min_ttl4(s, ttl);
                    949:   else
                    950:     return sk_set_min_ttl6(s, ttl);
                    951: }
                    952: 
                    953: #if 0
                    954: /**
                    955:  * sk_set_md5_auth - add / remove MD5 security association for given socket
                    956:  * @s: socket
                    957:  * @local: IP address of local side
                    958:  * @remote: IP address of remote side
                    959:  * @ifa: Interface for link-local IP address
                    960:  * @passwd: Password used for MD5 authentication
                    961:  * @setkey: Update also system SA/SP database
                    962:  *
                    963:  * In TCP MD5 handling code in kernel, there is a set of security associations
                    964:  * used for choosing password and other authentication parameters according to
                    965:  * the local and remote address. This function is useful for listening socket,
                    966:  * for active sockets it may be enough to set s->password field.
                    967:  *
                    968:  * When called with passwd != NULL, the new pair is added,
                    969:  * When called with passwd == NULL, the existing pair is removed.
                    970:  *
                    971:  * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
                    972:  * stored in global SA/SP database (but the behavior also must be enabled on
                    973:  * per-socket basis). In case of multiple sockets to the same neighbor, the
                    974:  * socket-specific state must be configured for each socket while global state
                    975:  * just once per src-dst pair. The @setkey argument controls whether the global
                    976:  * state (SA/SP database) is also updated.
                    977:  *
                    978:  * Result: 0 for success, -1 for an error.
                    979:  */
                    980: 
                    981: int
                    982: sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
                    983: { DUMMY; }
                    984: #endif
                    985: 
                    986: /**
                    987:  * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
                    988:  * @s: socket
                    989:  * @offset: offset
                    990:  *
                    991:  * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
                    992:  * kernel will automatically fill it for outgoing packets and check it for
                    993:  * incoming packets. Should not be used on ICMPv6 sockets, where the position is
                    994:  * known to the kernel.
                    995:  *
                    996:  * Result: 0 for success, -1 for an error.
                    997:  */
                    998: 
                    999: int
                   1000: sk_set_ipv6_checksum(sock *s, int offset)
                   1001: {
                   1002:   if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
                   1003:     ERR("IPV6_CHECKSUM");
                   1004: 
                   1005:   return 0;
                   1006: }
                   1007: 
                   1008: int
                   1009: sk_set_icmp6_filter(sock *s, int p1, int p2)
                   1010: {
                   1011:   /* a bit of lame interface, but it is here only for Radv */
                   1012:   struct icmp6_filter f;
                   1013: 
                   1014:   ICMP6_FILTER_SETBLOCKALL(&f);
                   1015:   ICMP6_FILTER_SETPASS(p1, &f);
                   1016:   ICMP6_FILTER_SETPASS(p2, &f);
                   1017: 
                   1018:   if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
                   1019:     ERR("ICMP6_FILTER");
                   1020: 
                   1021:   return 0;
                   1022: }
                   1023: 
                   1024: void
                   1025: sk_log_error(sock *s, const char *p)
                   1026: {
                   1027:   log(L_ERR "%s: Socket error: %s%#m", p, s->err);
                   1028: }
                   1029: 
                   1030: 
                   1031: /*
                   1032:  *     Actual struct birdsock code
                   1033:  */
                   1034: 
                   1035: static list sock_list;
                   1036: static struct birdsock *current_sock;
                   1037: static struct birdsock *stored_sock;
                   1038: 
                   1039: static inline sock *
                   1040: sk_next(sock *s)
                   1041: {
                   1042:   if (!s->n.next->next)
                   1043:     return NULL;
                   1044:   else
                   1045:     return SKIP_BACK(sock, n, s->n.next);
                   1046: }
                   1047: 
                   1048: static void
                   1049: sk_alloc_bufs(sock *s)
                   1050: {
                   1051:   if (!s->rbuf && s->rbsize)
                   1052:     s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
                   1053:   s->rpos = s->rbuf;
                   1054:   if (!s->tbuf && s->tbsize)
                   1055:     s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
                   1056:   s->tpos = s->ttx = s->tbuf;
                   1057: }
                   1058: 
                   1059: static void
                   1060: sk_free_bufs(sock *s)
                   1061: {
                   1062:   if (s->rbuf_alloc)
                   1063:   {
                   1064:     xfree(s->rbuf_alloc);
                   1065:     s->rbuf = s->rbuf_alloc = NULL;
                   1066:   }
                   1067:   if (s->tbuf_alloc)
                   1068:   {
                   1069:     xfree(s->tbuf_alloc);
                   1070:     s->tbuf = s->tbuf_alloc = NULL;
                   1071:   }
                   1072: }
                   1073: 
                   1074: static void
                   1075: sk_free(resource *r)
                   1076: {
                   1077:   sock *s = (sock *) r;
                   1078: 
                   1079:   sk_free_bufs(s);
                   1080:   if (s->fd >= 0)
                   1081:   {
                   1082:     close(s->fd);
                   1083: 
                   1084:     /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
                   1085:     if (s->flags & SKF_THREAD)
                   1086:       return;
                   1087: 
                   1088:     if (s == current_sock)
                   1089:       current_sock = sk_next(s);
                   1090:     if (s == stored_sock)
                   1091:       stored_sock = sk_next(s);
                   1092:     rem_node(&s->n);
                   1093:   }
                   1094: }
                   1095: 
                   1096: void
                   1097: sk_set_rbsize(sock *s, uint val)
                   1098: {
                   1099:   ASSERT(s->rbuf_alloc == s->rbuf);
                   1100: 
                   1101:   if (s->rbsize == val)
                   1102:     return;
                   1103: 
                   1104:   s->rbsize = val;
                   1105:   xfree(s->rbuf_alloc);
                   1106:   s->rbuf_alloc = xmalloc(val);
                   1107:   s->rpos = s->rbuf = s->rbuf_alloc;
                   1108: }
                   1109: 
                   1110: void
                   1111: sk_set_tbsize(sock *s, uint val)
                   1112: {
                   1113:   ASSERT(s->tbuf_alloc == s->tbuf);
                   1114: 
                   1115:   if (s->tbsize == val)
                   1116:     return;
                   1117: 
                   1118:   byte *old_tbuf = s->tbuf;
                   1119: 
                   1120:   s->tbsize = val;
                   1121:   s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
                   1122:   s->tpos = s->tbuf + (s->tpos - old_tbuf);
                   1123:   s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
                   1124: }
                   1125: 
                   1126: void
                   1127: sk_set_tbuf(sock *s, void *tbuf)
                   1128: {
                   1129:   s->tbuf = tbuf ?: s->tbuf_alloc;
                   1130:   s->ttx = s->tpos = s->tbuf;
                   1131: }
                   1132: 
                   1133: void
                   1134: sk_reallocate(sock *s)
                   1135: {
                   1136:   sk_free_bufs(s);
                   1137:   sk_alloc_bufs(s);
                   1138: }
                   1139: 
                   1140: static void
                   1141: sk_dump(resource *r)
                   1142: {
                   1143:   sock *s = (sock *) r;
                   1144:   static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "DEL!" };
                   1145: 
                   1146:   debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
                   1147:        sk_type_names[s->type],
                   1148:        s->data,
                   1149:        s->saddr,
                   1150:        s->sport,
                   1151:        s->daddr,
                   1152:        s->dport,
                   1153:        s->tos,
                   1154:        s->ttl,
                   1155:        s->iface ? s->iface->name : "none");
                   1156: }
                   1157: 
                   1158: static struct resclass sk_class = {
                   1159:   "Socket",
                   1160:   sizeof(sock),
                   1161:   sk_free,
                   1162:   sk_dump,
                   1163:   NULL,
                   1164:   NULL
                   1165: };
                   1166: 
                   1167: /**
                   1168:  * sk_new - create a socket
                   1169:  * @p: pool
                   1170:  *
                   1171:  * This function creates a new socket resource. If you want to use it,
                   1172:  * you need to fill in all the required fields of the structure and
                   1173:  * call sk_open() to do the actual opening of the socket.
                   1174:  *
                   1175:  * The real function name is sock_new(), sk_new() is a macro wrapper
                   1176:  * to avoid collision with OpenSSL.
                   1177:  */
                   1178: sock *
                   1179: sock_new(pool *p)
                   1180: {
                   1181:   sock *s = ralloc(p, &sk_class);
                   1182:   s->pool = p;
                   1183:   // s->saddr = s->daddr = IPA_NONE;
                   1184:   s->tos = s->priority = s->ttl = -1;
                   1185:   s->fd = -1;
                   1186:   return s;
                   1187: }
                   1188: 
                   1189: static int
                   1190: sk_setup(sock *s)
                   1191: {
                   1192:   int y = 1;
                   1193:   int fd = s->fd;
                   1194: 
                   1195:   if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
                   1196:     ERR("O_NONBLOCK");
                   1197: 
                   1198:   if (!s->af)
                   1199:     return 0;
                   1200: 
                   1201:   if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
                   1202:     s->flags |= SKF_PKTINFO;
                   1203: 
                   1204: #ifdef CONFIG_USE_HDRINCL
                   1205:   if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
                   1206:   {
                   1207:     s->flags &= ~SKF_PKTINFO;
                   1208:     s->flags |= SKF_HDRINCL;
                   1209:     if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
                   1210:       ERR("IP_HDRINCL");
                   1211:   }
                   1212: #endif
                   1213: 
                   1214:   if (s->iface)
                   1215:   {
                   1216: #ifdef SO_BINDTODEVICE
                   1217:     struct ifreq ifr = {};
                   1218:     strcpy(ifr.ifr_name, s->iface->name);
                   1219:     if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
                   1220:       ERR("SO_BINDTODEVICE");
                   1221: #endif
                   1222: 
                   1223: #ifdef CONFIG_UNIX_DONTROUTE
                   1224:     if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
                   1225:       ERR("SO_DONTROUTE");
                   1226: #endif
                   1227:   }
                   1228: 
                   1229:   if (s->priority >= 0)
                   1230:     if (sk_set_priority(s, s->priority) < 0)
                   1231:       return -1;
                   1232: 
                   1233:   if (sk_is_ipv4(s))
                   1234:   {
                   1235:     if (s->flags & SKF_LADDR_RX)
                   1236:       if (sk_request_cmsg4_pktinfo(s) < 0)
                   1237:        return -1;
                   1238: 
                   1239:     if (s->flags & SKF_TTL_RX)
                   1240:       if (sk_request_cmsg4_ttl(s) < 0)
                   1241:        return -1;
                   1242: 
                   1243:     if ((s->type == SK_UDP) || (s->type == SK_IP))
                   1244:       if (sk_disable_mtu_disc4(s) < 0)
                   1245:        return -1;
                   1246: 
                   1247:     if (s->ttl >= 0)
                   1248:       if (sk_set_ttl4(s, s->ttl) < 0)
                   1249:        return -1;
                   1250: 
                   1251:     if (s->tos >= 0)
                   1252:       if (sk_set_tos4(s, s->tos) < 0)
                   1253:        return -1;
                   1254:   }
                   1255: 
                   1256:   if (sk_is_ipv6(s))
                   1257:   {
                   1258:     if (s->flags & SKF_V6ONLY)
                   1259:       if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
                   1260:        ERR("IPV6_V6ONLY");
                   1261: 
                   1262:     if (s->flags & SKF_LADDR_RX)
                   1263:       if (sk_request_cmsg6_pktinfo(s) < 0)
                   1264:        return -1;
                   1265: 
                   1266:     if (s->flags & SKF_TTL_RX)
                   1267:       if (sk_request_cmsg6_ttl(s) < 0)
                   1268:        return -1;
                   1269: 
                   1270:     if ((s->type == SK_UDP) || (s->type == SK_IP))
                   1271:       if (sk_disable_mtu_disc6(s) < 0)
                   1272:        return -1;
                   1273: 
                   1274:     if (s->ttl >= 0)
                   1275:       if (sk_set_ttl6(s, s->ttl) < 0)
                   1276:        return -1;
                   1277: 
                   1278:     if (s->tos >= 0)
                   1279:       if (sk_set_tos6(s, s->tos) < 0)
                   1280:        return -1;
                   1281:   }
                   1282: 
                   1283:   return 0;
                   1284: }
                   1285: 
                   1286: static void
                   1287: sk_insert(sock *s)
                   1288: {
                   1289:   add_tail(&sock_list, &s->n);
                   1290: }
                   1291: 
                   1292: static void
                   1293: sk_tcp_connected(sock *s)
                   1294: {
                   1295:   sockaddr sa;
                   1296:   int sa_len = sizeof(sa);
                   1297: 
                   1298:   if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
                   1299:       (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
                   1300:     log(L_WARN "SOCK: Cannot get local IP address for TCP>");
                   1301: 
                   1302:   s->type = SK_TCP;
                   1303:   sk_alloc_bufs(s);
                   1304:   s->tx_hook(s);
                   1305: }
                   1306: 
                   1307: static int
                   1308: sk_passive_connected(sock *s, int type)
                   1309: {
                   1310:   sockaddr loc_sa, rem_sa;
                   1311:   int loc_sa_len = sizeof(loc_sa);
                   1312:   int rem_sa_len = sizeof(rem_sa);
                   1313: 
                   1314:   int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
                   1315:   if (fd < 0)
                   1316:   {
                   1317:     if ((errno != EINTR) && (errno != EAGAIN))
                   1318:       s->err_hook(s, errno);
                   1319:     return 0;
                   1320:   }
                   1321: 
                   1322:   sock *t = sk_new(s->pool);
                   1323:   t->type = type;
                   1324:   t->fd = fd;
                   1325:   t->af = s->af;
                   1326:   t->ttl = s->ttl;
                   1327:   t->tos = s->tos;
                   1328:   t->rbsize = s->rbsize;
                   1329:   t->tbsize = s->tbsize;
                   1330: 
                   1331:   if (type == SK_TCP)
                   1332:   {
                   1333:     if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
                   1334:        (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
                   1335:       log(L_WARN "SOCK: Cannot get local IP address for TCP<");
                   1336: 
                   1337:     if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
                   1338:       log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
                   1339:   }
                   1340: 
                   1341:   if (sk_setup(t) < 0)
                   1342:   {
                   1343:     /* FIXME: Call err_hook instead ? */
                   1344:     log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
                   1345: 
                   1346:     /* FIXME: handle it better in rfree() */
                   1347:     close(t->fd);
                   1348:     t->fd = -1;
                   1349:     rfree(t);
                   1350:     return 1;
                   1351:   }
                   1352: 
                   1353:   sk_insert(t);
                   1354:   sk_alloc_bufs(t);
                   1355:   s->rx_hook(t, 0);
                   1356:   return 1;
                   1357: }
                   1358: 
                   1359: /**
                   1360:  * sk_open - open a socket
                   1361:  * @s: socket
                   1362:  *
                   1363:  * This function takes a socket resource created by sk_new() and
                   1364:  * initialized by the user and binds a corresponding network connection
                   1365:  * to it.
                   1366:  *
                   1367:  * Result: 0 for success, -1 for an error.
                   1368:  */
                   1369: int
                   1370: sk_open(sock *s)
                   1371: {
                   1372:   int af = BIRD_AF;
                   1373:   int fd = -1;
                   1374:   int do_bind = 0;
                   1375:   int bind_port = 0;
                   1376:   ip_addr bind_addr = IPA_NONE;
                   1377:   sockaddr sa;
                   1378: 
                   1379:   switch (s->type)
                   1380:   {
                   1381:   case SK_TCP_ACTIVE:
                   1382:     s->ttx = "";                       /* Force s->ttx != s->tpos */
                   1383:     /* Fall thru */
                   1384:   case SK_TCP_PASSIVE:
                   1385:     fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
                   1386:     bind_port = s->sport;
                   1387:     bind_addr = s->saddr;
                   1388:     do_bind = bind_port || ipa_nonzero(bind_addr);
                   1389:     break;
                   1390: 
                   1391:   case SK_UDP:
                   1392:     fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
                   1393:     bind_port = s->sport;
                   1394:     bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
                   1395:     do_bind = 1;
                   1396:     break;
                   1397: 
                   1398:   case SK_IP:
                   1399:     fd = socket(af, SOCK_RAW, s->dport);
                   1400:     bind_port = 0;
                   1401:     bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
                   1402:     do_bind = ipa_nonzero(bind_addr);
                   1403:     break;
                   1404: 
                   1405:   case SK_MAGIC:
                   1406:     af = 0;
                   1407:     fd = s->fd;
                   1408:     break;
                   1409: 
                   1410:   default:
                   1411:     bug("sk_open() called for invalid sock type %d", s->type);
                   1412:   }
                   1413: 
                   1414:   if (fd < 0)
                   1415:     ERR("socket");
                   1416: 
                   1417:   s->af = af;
                   1418:   s->fd = fd;
                   1419: 
                   1420:   if (sk_setup(s) < 0)
                   1421:     goto err;
                   1422: 
                   1423:   if (do_bind)
                   1424:   {
                   1425:     if (bind_port)
                   1426:     {
                   1427:       int y = 1;
                   1428: 
                   1429:       if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
                   1430:        ERR2("SO_REUSEADDR");
                   1431: 
                   1432: #ifdef CONFIG_NO_IFACE_BIND
                   1433:       /* Workaround missing ability to bind to an iface */
                   1434:       if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
                   1435:       {
                   1436:        if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
                   1437:          ERR2("SO_REUSEPORT");
                   1438:       }
                   1439: #endif
                   1440:     }
                   1441:     else
                   1442:       if (s->flags & SKF_HIGH_PORT)
                   1443:        if (sk_set_high_port(s) < 0)
                   1444:          log(L_WARN "Socket error: %s%#m", s->err);
                   1445: 
                   1446:     sockaddr_fill(&sa, af, bind_addr, s->iface, bind_port);
                   1447:     if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
                   1448:       ERR2("bind");
                   1449:   }
                   1450: 
                   1451:   if (s->password)
                   1452:     if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
                   1453:       goto err;
                   1454: 
                   1455:   switch (s->type)
                   1456:   {
                   1457:   case SK_TCP_ACTIVE:
                   1458:     sockaddr_fill(&sa, af, s->daddr, s->iface, s->dport);
                   1459:     if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
                   1460:       sk_tcp_connected(s);
                   1461:     else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
                   1462:             errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
                   1463:       ERR2("connect");
                   1464:     break;
                   1465: 
                   1466:   case SK_TCP_PASSIVE:
                   1467:     if (listen(fd, 8) < 0)
                   1468:       ERR2("listen");
                   1469:     break;
                   1470: 
                   1471:   case SK_MAGIC:
                   1472:     break;
                   1473: 
                   1474:   default:
                   1475:     sk_alloc_bufs(s);
                   1476:   }
                   1477: 
                   1478:   if (!(s->flags & SKF_THREAD))
                   1479:     sk_insert(s);
                   1480:   return 0;
                   1481: 
                   1482: err:
                   1483:   close(fd);
                   1484:   s->fd = -1;
                   1485:   return -1;
                   1486: }
                   1487: 
                   1488: int
                   1489: sk_open_unix(sock *s, char *name)
                   1490: {
                   1491:   struct sockaddr_un sa;
                   1492:   int fd;
                   1493: 
                   1494:   /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
                   1495: 
                   1496:   fd = socket(AF_UNIX, SOCK_STREAM, 0);
                   1497:   if (fd < 0)
                   1498:     return -1;
                   1499: 
                   1500:   if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
                   1501:     return -1;
                   1502: 
                   1503:   /* Path length checked in test_old_bird() */
                   1504:   sa.sun_family = AF_UNIX;
                   1505:   strcpy(sa.sun_path, name);
                   1506: 
                   1507:   if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
                   1508:     return -1;
                   1509: 
                   1510:   if (listen(fd, 8) < 0)
                   1511:     return -1;
                   1512: 
                   1513:   s->fd = fd;
                   1514:   sk_insert(s);
                   1515:   return 0;
                   1516: }
                   1517: 
                   1518: 
                   1519: #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
                   1520:                          CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
                   1521: #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
                   1522: 
                   1523: static void
                   1524: sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
                   1525: {
                   1526:   if (sk_is_ipv4(s))
                   1527:     sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
                   1528:   else
                   1529:     sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
                   1530: }
                   1531: 
                   1532: static void
                   1533: sk_process_cmsgs(sock *s, struct msghdr *msg)
                   1534: {
                   1535:   struct cmsghdr *cm;
                   1536: 
                   1537:   s->laddr = IPA_NONE;
                   1538:   s->lifindex = 0;
                   1539:   s->rcv_ttl = -1;
                   1540: 
                   1541:   for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
                   1542:   {
                   1543:     if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
                   1544:     {
                   1545:       sk_process_cmsg4_pktinfo(s, cm);
                   1546:       sk_process_cmsg4_ttl(s, cm);
                   1547:     }
                   1548: 
                   1549:     if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
                   1550:     {
                   1551:       sk_process_cmsg6_pktinfo(s, cm);
                   1552:       sk_process_cmsg6_ttl(s, cm);
                   1553:     }
                   1554:   }
                   1555: }
                   1556: 
                   1557: 
                   1558: static inline int
                   1559: sk_sendmsg(sock *s)
                   1560: {
                   1561:   struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
                   1562:   byte cmsg_buf[CMSG_TX_SPACE];
                   1563:   sockaddr dst;
                   1564: 
                   1565:   sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
                   1566: 
                   1567:   struct msghdr msg = {
                   1568:     .msg_name = &dst.sa,
                   1569:     .msg_namelen = SA_LEN(dst),
                   1570:     .msg_iov = &iov,
                   1571:     .msg_iovlen = 1
                   1572:   };
                   1573: 
                   1574: #ifdef CONFIG_USE_HDRINCL
                   1575:   byte hdr[20];
                   1576:   struct iovec iov2[2] = { {hdr, 20}, iov };
                   1577: 
                   1578:   if (s->flags & SKF_HDRINCL)
                   1579:   {
                   1580:     sk_prepare_ip_header(s, hdr, iov.iov_len);
                   1581:     msg.msg_iov = iov2;
                   1582:     msg.msg_iovlen = 2;
                   1583:   }
                   1584: #endif
                   1585: 
                   1586:   if (s->flags & SKF_PKTINFO)
                   1587:     sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
                   1588: 
                   1589:   return sendmsg(s->fd, &msg, 0);
                   1590: }
                   1591: 
                   1592: static inline int
                   1593: sk_recvmsg(sock *s)
                   1594: {
                   1595:   struct iovec iov = {s->rbuf, s->rbsize};
                   1596:   byte cmsg_buf[CMSG_RX_SPACE];
                   1597:   sockaddr src;
                   1598: 
                   1599:   struct msghdr msg = {
                   1600:     .msg_name = &src.sa,
                   1601:     .msg_namelen = sizeof(src), // XXXX ??
                   1602:     .msg_iov = &iov,
                   1603:     .msg_iovlen = 1,
                   1604:     .msg_control = cmsg_buf,
                   1605:     .msg_controllen = sizeof(cmsg_buf),
                   1606:     .msg_flags = 0
                   1607:   };
                   1608: 
                   1609:   int rv = recvmsg(s->fd, &msg, 0);
                   1610:   if (rv < 0)
                   1611:     return rv;
                   1612: 
                   1613:   //ifdef IPV4
                   1614:   //  if (cf_type == SK_IP)
                   1615:   //    rv = ipv4_skip_header(pbuf, rv);
                   1616:   //endif
                   1617: 
                   1618:   sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
                   1619:   sk_process_cmsgs(s, &msg);
                   1620: 
                   1621:   if (msg.msg_flags & MSG_TRUNC)
                   1622:     s->flags |= SKF_TRUNCATED;
                   1623:   else
                   1624:     s->flags &= ~SKF_TRUNCATED;
                   1625: 
                   1626:   return rv;
                   1627: }
                   1628: 
                   1629: 
                   1630: static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
                   1631: 
                   1632: static int
                   1633: sk_maybe_write(sock *s)
                   1634: {
                   1635:   int e;
                   1636: 
                   1637:   switch (s->type)
                   1638:   {
                   1639:   case SK_TCP:
                   1640:   case SK_MAGIC:
                   1641:   case SK_UNIX:
                   1642:     while (s->ttx != s->tpos)
                   1643:     {
                   1644:       e = write(s->fd, s->ttx, s->tpos - s->ttx);
                   1645: 
                   1646:       if (e < 0)
                   1647:       {
                   1648:        if (errno != EINTR && errno != EAGAIN)
                   1649:        {
                   1650:          reset_tx_buffer(s);
                   1651:          /* EPIPE is just a connection close notification during TX */
                   1652:          s->err_hook(s, (errno != EPIPE) ? errno : 0);
                   1653:          return -1;
                   1654:        }
                   1655:        return 0;
                   1656:       }
                   1657:       s->ttx += e;
                   1658:     }
                   1659:     reset_tx_buffer(s);
                   1660:     return 1;
                   1661: 
                   1662:   case SK_UDP:
                   1663:   case SK_IP:
                   1664:     {
                   1665:       if (s->tbuf == s->tpos)
                   1666:        return 1;
                   1667: 
                   1668:       e = sk_sendmsg(s);
                   1669: 
                   1670:       if (e < 0)
                   1671:       {
                   1672:        if (errno != EINTR && errno != EAGAIN)
                   1673:        {
                   1674:          reset_tx_buffer(s);
                   1675:          s->err_hook(s, errno);
                   1676:          return -1;
                   1677:        }
                   1678: 
                   1679:        if (!s->tx_hook)
                   1680:          reset_tx_buffer(s);
                   1681:        return 0;
                   1682:       }
                   1683:       reset_tx_buffer(s);
                   1684:       return 1;
                   1685:     }
                   1686:   default:
                   1687:     bug("sk_maybe_write: unknown socket type %d", s->type);
                   1688:   }
                   1689: }
                   1690: 
                   1691: int
                   1692: sk_rx_ready(sock *s)
                   1693: {
                   1694:   int rv;
                   1695:   struct pollfd pfd = { .fd = s->fd };
                   1696:   pfd.events |= POLLIN;
                   1697: 
                   1698:  redo:
                   1699:   rv = poll(&pfd, 1, 0);
                   1700: 
                   1701:   if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
                   1702:     goto redo;
                   1703: 
                   1704:   return rv;
                   1705: }
                   1706: 
                   1707: /**
                   1708:  * sk_send - send data to a socket
                   1709:  * @s: socket
                   1710:  * @len: number of bytes to send
                   1711:  *
                   1712:  * This function sends @len bytes of data prepared in the
                   1713:  * transmit buffer of the socket @s to the network connection.
                   1714:  * If the packet can be sent immediately, it does so and returns
                   1715:  * 1, else it queues the packet for later processing, returns 0
                   1716:  * and calls the @tx_hook of the socket when the tranmission
                   1717:  * takes place.
                   1718:  */
                   1719: int
                   1720: sk_send(sock *s, unsigned len)
                   1721: {
                   1722:   s->ttx = s->tbuf;
                   1723:   s->tpos = s->tbuf + len;
                   1724:   return sk_maybe_write(s);
                   1725: }
                   1726: 
                   1727: /**
                   1728:  * sk_send_to - send data to a specific destination
                   1729:  * @s: socket
                   1730:  * @len: number of bytes to send
                   1731:  * @addr: IP address to send the packet to
                   1732:  * @port: port to send the packet to
                   1733:  *
                   1734:  * This is a sk_send() replacement for connection-less packet sockets
                   1735:  * which allows destination of the packet to be chosen dynamically.
                   1736:  * Raw IP sockets should use 0 for @port.
                   1737:  */
                   1738: int
                   1739: sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
                   1740: {
                   1741:   s->daddr = addr;
                   1742:   if (port)
                   1743:     s->dport = port;
                   1744: 
                   1745:   s->ttx = s->tbuf;
                   1746:   s->tpos = s->tbuf + len;
                   1747:   return sk_maybe_write(s);
                   1748: }
                   1749: 
                   1750: /*
                   1751: int
                   1752: sk_send_full(sock *s, unsigned len, struct iface *ifa,
                   1753:             ip_addr saddr, ip_addr daddr, unsigned dport)
                   1754: {
                   1755:   s->iface = ifa;
                   1756:   s->saddr = saddr;
                   1757:   s->daddr = daddr;
                   1758:   s->dport = dport;
                   1759:   s->ttx = s->tbuf;
                   1760:   s->tpos = s->tbuf + len;
                   1761:   return sk_maybe_write(s);
                   1762: }
                   1763: */
                   1764: 
                   1765:  /* sk_read() and sk_write() are called from BFD's event loop */
                   1766: 
                   1767: int
                   1768: sk_read(sock *s, int revents)
                   1769: {
                   1770:   switch (s->type)
                   1771:   {
                   1772:   case SK_TCP_PASSIVE:
                   1773:     return sk_passive_connected(s, SK_TCP);
                   1774: 
                   1775:   case SK_UNIX_PASSIVE:
                   1776:     return sk_passive_connected(s, SK_UNIX);
                   1777: 
                   1778:   case SK_TCP:
                   1779:   case SK_UNIX:
                   1780:     {
                   1781:       int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
                   1782: 
                   1783:       if (c < 0)
                   1784:       {
                   1785:        if (errno != EINTR && errno != EAGAIN)
                   1786:          s->err_hook(s, errno);
                   1787:        else if (errno == EAGAIN && !(revents & POLLIN))
                   1788:        {
                   1789:          log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
                   1790:          s->err_hook(s, 0);
                   1791:        }
                   1792:       }
                   1793:       else if (!c)
                   1794:        s->err_hook(s, 0);
                   1795:       else
                   1796:       {
                   1797:        s->rpos += c;
                   1798:        if (s->rx_hook(s, s->rpos - s->rbuf))
                   1799:        {
                   1800:          /* We need to be careful since the socket could have been deleted by the hook */
                   1801:          if (current_sock == s)
                   1802:            s->rpos = s->rbuf;
                   1803:        }
                   1804:        return 1;
                   1805:       }
                   1806:       return 0;
                   1807:     }
                   1808: 
                   1809:   case SK_MAGIC:
                   1810:     return s->rx_hook(s, 0);
                   1811: 
                   1812:   default:
                   1813:     {
                   1814:       int e = sk_recvmsg(s);
                   1815: 
                   1816:       if (e < 0)
                   1817:       {
                   1818:        if (errno != EINTR && errno != EAGAIN)
                   1819:          s->err_hook(s, errno);
                   1820:        return 0;
                   1821:       }
                   1822: 
                   1823:       s->rpos = s->rbuf + e;
                   1824:       s->rx_hook(s, e);
                   1825:       return 1;
                   1826:     }
                   1827:   }
                   1828: }
                   1829: 
                   1830: int
                   1831: sk_write(sock *s)
                   1832: {
                   1833:   switch (s->type)
                   1834:   {
                   1835:   case SK_TCP_ACTIVE:
                   1836:     {
                   1837:       sockaddr sa;
                   1838:       sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
                   1839: 
                   1840:       if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
                   1841:        sk_tcp_connected(s);
                   1842:       else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
                   1843:        s->err_hook(s, errno);
                   1844:       return 0;
                   1845:     }
                   1846: 
                   1847:   default:
                   1848:     if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
                   1849:     {
                   1850:       if (s->tx_hook)
                   1851:        s->tx_hook(s);
                   1852:       return 1;
                   1853:     }
                   1854:     return 0;
                   1855:   }
                   1856: }
                   1857: 
                   1858: void
                   1859: sk_err(sock *s, int revents)
                   1860: {
                   1861:   int se = 0, sse = sizeof(se);
                   1862:   if ((s->type != SK_MAGIC) && (revents & POLLERR))
                   1863:     if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
                   1864:     {
                   1865:       log(L_ERR "IO: Socket error: SO_ERROR: %m");
                   1866:       se = 0;
                   1867:     }
                   1868: 
                   1869:   s->err_hook(s, se);
                   1870: }
                   1871: 
                   1872: void
                   1873: sk_dump_all(void)
                   1874: {
                   1875:   node *n;
                   1876:   sock *s;
                   1877: 
                   1878:   debug("Open sockets:\n");
                   1879:   WALK_LIST(n, sock_list)
                   1880:   {
                   1881:     s = SKIP_BACK(sock, n, n);
                   1882:     debug("%p ", s);
                   1883:     sk_dump(&s->r);
                   1884:   }
                   1885:   debug("\n");
                   1886: }
                   1887: 
                   1888: 
                   1889: /*
                   1890:  *     Internal event log and watchdog
                   1891:  */
                   1892: 
                   1893: #define EVENT_LOG_LENGTH 32
                   1894: 
                   1895: struct event_log_entry
                   1896: {
                   1897:   void *hook;
                   1898:   void *data;
                   1899:   btime timestamp;
                   1900:   btime duration;
                   1901: };
                   1902: 
                   1903: static struct event_log_entry event_log[EVENT_LOG_LENGTH];
                   1904: static struct event_log_entry *event_open;
                   1905: static int event_log_pos, event_log_num, watchdog_active;
                   1906: static btime last_time;
                   1907: static btime loop_time;
                   1908: 
                   1909: static void
                   1910: io_update_time(void)
                   1911: {
                   1912:   struct timespec ts;
                   1913:   int rv;
                   1914: 
                   1915:   if (!clock_monotonic_available)
                   1916:     return;
                   1917: 
                   1918:   /*
                   1919:    * This is third time-tracking procedure (after update_times() above and
                   1920:    * times_update() in BFD), dedicated to internal event log and latency
                   1921:    * tracking. Hopefully, we consolidate these sometimes.
                   1922:    */
                   1923: 
                   1924:   rv = clock_gettime(CLOCK_MONOTONIC, &ts);
                   1925:   if (rv < 0)
                   1926:     die("clock_gettime: %m");
                   1927: 
                   1928:   last_time = ((s64) ts.tv_sec S) + (ts.tv_nsec / 1000);
                   1929: 
                   1930:   if (event_open)
                   1931:   {
                   1932:     event_open->duration = last_time - event_open->timestamp;
                   1933: 
                   1934:     if (event_open->duration > config->latency_limit)
                   1935:       log(L_WARN "Event 0x%p 0x%p took %d ms",
                   1936:          event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
                   1937: 
                   1938:     event_open = NULL;
                   1939:   }
                   1940: }
                   1941: 
                   1942: /**
                   1943:  * io_log_event - mark approaching event into event log
                   1944:  * @hook: event hook address
                   1945:  * @data: event data address
                   1946:  *
                   1947:  * Store info (hook, data, timestamp) about the following internal event into
                   1948:  * a circular event log (@event_log). When latency tracking is enabled, the log
                   1949:  * entry is kept open (in @event_open) so the duration can be filled later.
                   1950:  */
                   1951: void
                   1952: io_log_event(void *hook, void *data)
                   1953: {
                   1954:   if (config->latency_debug)
                   1955:     io_update_time();
                   1956: 
                   1957:   struct event_log_entry *en = event_log + event_log_pos;
                   1958: 
                   1959:   en->hook = hook;
                   1960:   en->data = data;
                   1961:   en->timestamp = last_time;
                   1962:   en->duration = 0;
                   1963: 
                   1964:   event_log_num++;
                   1965:   event_log_pos++;
                   1966:   event_log_pos %= EVENT_LOG_LENGTH;
                   1967: 
                   1968:   event_open = config->latency_debug ? en : NULL;
                   1969: }
                   1970: 
                   1971: static inline void
                   1972: io_close_event(void)
                   1973: {
                   1974:   if (event_open)
                   1975:     io_update_time();
                   1976: }
                   1977: 
                   1978: void
                   1979: io_log_dump(void)
                   1980: {
                   1981:   int i;
                   1982: 
                   1983:   log(L_DEBUG "Event log:");
                   1984:   for (i = 0; i < EVENT_LOG_LENGTH; i++)
                   1985:   {
                   1986:     struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
                   1987:     if (en->hook)
                   1988:       log(L_DEBUG "  Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
                   1989:          (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
                   1990:   }
                   1991: }
                   1992: 
                   1993: void
                   1994: watchdog_sigalrm(int sig UNUSED)
                   1995: {
                   1996:   /* Update last_time and duration, but skip latency check */
                   1997:   config->latency_limit = 0xffffffff;
                   1998:   io_update_time();
                   1999: 
                   2000:   /* We want core dump */
                   2001:   abort();
                   2002: }
                   2003: 
                   2004: static inline void
                   2005: watchdog_start1(void)
                   2006: {
                   2007:   io_update_time();
                   2008: 
                   2009:   loop_time = last_time;
                   2010: }
                   2011: 
                   2012: static inline void
                   2013: watchdog_start(void)
                   2014: {
                   2015:   io_update_time();
                   2016: 
                   2017:   loop_time = last_time;
                   2018:   event_log_num = 0;
                   2019: 
                   2020:   if (config->watchdog_timeout)
                   2021:   {
                   2022:     alarm(config->watchdog_timeout);
                   2023:     watchdog_active = 1;
                   2024:   }
                   2025: }
                   2026: 
                   2027: static inline void
                   2028: watchdog_stop(void)
                   2029: {
                   2030:   io_update_time();
                   2031: 
                   2032:   if (watchdog_active)
                   2033:   {
                   2034:     alarm(0);
                   2035:     watchdog_active = 0;
                   2036:   }
                   2037: 
                   2038:   btime duration = last_time - loop_time;
                   2039:   if (duration > config->watchdog_warning)
                   2040:     log(L_WARN "I/O loop cycle took %d ms for %d events",
                   2041:        (int) (duration TO_MS), event_log_num);
                   2042: }
                   2043: 
                   2044: 
                   2045: /*
                   2046:  *     Main I/O Loop
                   2047:  */
                   2048: 
                   2049: volatile int async_config_flag;                /* Asynchronous reconfiguration/dump scheduled */
                   2050: volatile int async_dump_flag;
                   2051: volatile int async_shutdown_flag;
                   2052: 
                   2053: void
                   2054: io_init(void)
                   2055: {
                   2056:   init_list(&near_timers);
                   2057:   init_list(&far_timers);
                   2058:   init_list(&sock_list);
                   2059:   init_list(&global_event_list);
                   2060:   krt_io_init();
                   2061:   init_times();
                   2062:   update_times();
                   2063:   boot_time = now;
                   2064:   srandom((int) now_real);
                   2065: }
                   2066: 
                   2067: static int short_loops = 0;
                   2068: #define SHORT_LOOP_MAX 10
                   2069: 
                   2070: void
                   2071: io_loop(void)
                   2072: {
                   2073:   int poll_tout;
                   2074:   time_t tout;
                   2075:   int nfds, events, pout;
                   2076:   sock *s;
                   2077:   node *n;
                   2078:   int fdmax = 256;
                   2079:   struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
                   2080: 
                   2081:   watchdog_start1();
                   2082:   for(;;)
                   2083:     {
                   2084:       events = ev_run_list(&global_event_list);
                   2085:     timers:
                   2086:       update_times();
                   2087:       tout = tm_first_shot();
                   2088:       if (tout <= now)
                   2089:        {
                   2090:          tm_shot();
                   2091:          goto timers;
                   2092:        }
                   2093:       poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */
                   2094: 
                   2095:       io_close_event();
                   2096: 
                   2097:       nfds = 0;
                   2098:       WALK_LIST(n, sock_list)
                   2099:        {
                   2100:          pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
                   2101:          s = SKIP_BACK(sock, n, n);
                   2102:          if (s->rx_hook)
                   2103:            {
                   2104:              pfd[nfds].fd = s->fd;
                   2105:              pfd[nfds].events |= POLLIN;
                   2106:            }
                   2107:          if (s->tx_hook && s->ttx != s->tpos)
                   2108:            {
                   2109:              pfd[nfds].fd = s->fd;
                   2110:              pfd[nfds].events |= POLLOUT;
                   2111:            }
                   2112:          if (pfd[nfds].fd != -1)
                   2113:            {
                   2114:              s->index = nfds;
                   2115:              nfds++;
                   2116:            }
                   2117:          else
                   2118:            s->index = -1;
                   2119: 
                   2120:          if (nfds >= fdmax)
                   2121:            {
                   2122:              fdmax *= 2;
                   2123:              pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
                   2124:            }
                   2125:        }
                   2126: 
                   2127:       /*
                   2128:        * Yes, this is racy. But even if the signal comes before this test
                   2129:        * and entering poll(), it gets caught on the next timer tick.
                   2130:        */
                   2131: 
                   2132:       if (async_config_flag)
                   2133:        {
                   2134:          io_log_event(async_config, NULL);
                   2135:          async_config();
                   2136:          async_config_flag = 0;
                   2137:          continue;
                   2138:        }
                   2139:       if (async_dump_flag)
                   2140:        {
                   2141:          io_log_event(async_dump, NULL);
                   2142:          async_dump();
                   2143:          async_dump_flag = 0;
                   2144:          continue;
                   2145:        }
                   2146:       if (async_shutdown_flag)
                   2147:        {
                   2148:          io_log_event(async_shutdown, NULL);
                   2149:          async_shutdown();
                   2150:          async_shutdown_flag = 0;
                   2151:          continue;
                   2152:        }
                   2153: 
                   2154:       /* And finally enter poll() to find active sockets */
                   2155:       watchdog_stop();
                   2156:       pout = poll(pfd, nfds, poll_tout);
                   2157:       watchdog_start();
                   2158: 
                   2159:       if (pout < 0)
                   2160:        {
                   2161:          if (errno == EINTR || errno == EAGAIN)
                   2162:            continue;
                   2163:          die("poll: %m");
                   2164:        }
                   2165:       if (pout)
                   2166:        {
                   2167:          /* guaranteed to be non-empty */
                   2168:          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
                   2169: 
                   2170:          while (current_sock)
                   2171:            {
                   2172:              sock *s = current_sock;
                   2173:              if (s->index == -1)
                   2174:                {
                   2175:                  current_sock = sk_next(s);
                   2176:                  goto next;
                   2177:                }
                   2178: 
                   2179:              int e;
                   2180:              int steps;
                   2181: 
                   2182:              steps = MAX_STEPS;
                   2183:              if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
                   2184:                do
                   2185:                  {
                   2186:                    steps--;
                   2187:                    io_log_event(s->rx_hook, s->data);
                   2188:                    e = sk_read(s, pfd[s->index].revents);
                   2189:                    if (s != current_sock)
                   2190:                      goto next;
                   2191:                  }
                   2192:                while (e && s->rx_hook && steps);
                   2193: 
                   2194:              steps = MAX_STEPS;
                   2195:              if (pfd[s->index].revents & POLLOUT)
                   2196:                do
                   2197:                  {
                   2198:                    steps--;
                   2199:                    io_log_event(s->tx_hook, s->data);
                   2200:                    e = sk_write(s);
                   2201:                    if (s != current_sock)
                   2202:                      goto next;
                   2203:                  }
                   2204:                while (e && steps);
                   2205: 
                   2206:              current_sock = sk_next(s);
                   2207:            next: ;
                   2208:            }
                   2209: 
                   2210:          short_loops++;
                   2211:          if (events && (short_loops < SHORT_LOOP_MAX))
                   2212:            continue;
                   2213:          short_loops = 0;
                   2214: 
                   2215:          int count = 0;
                   2216:          current_sock = stored_sock;
                   2217:          if (current_sock == NULL)
                   2218:            current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
                   2219: 
                   2220:          while (current_sock && count < MAX_RX_STEPS)
                   2221:            {
                   2222:              sock *s = current_sock;
                   2223:              if (s->index == -1)
                   2224:                {
                   2225:                  current_sock = sk_next(s);
                   2226:                  goto next2;
                   2227:                }
                   2228: 
                   2229:              if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
                   2230:                {
                   2231:                  count++;
                   2232:                  io_log_event(s->rx_hook, s->data);
                   2233:                  sk_read(s, pfd[s->index].revents);
                   2234:                  if (s != current_sock)
                   2235:                    goto next2;
                   2236:                }
                   2237: 
                   2238:              if (pfd[s->index].revents & (POLLHUP | POLLERR))
                   2239:                {
                   2240:                  sk_err(s, pfd[s->index].revents);
                   2241:                  if (s != current_sock)
                   2242:                    goto next2;
                   2243:                }
                   2244: 
                   2245:              current_sock = sk_next(s);
                   2246:            next2: ;
                   2247:            }
                   2248: 
                   2249: 
                   2250:          stored_sock = current_sock;
                   2251:        }
                   2252:     }
                   2253: }
                   2254: 
                   2255: void
                   2256: test_old_bird(char *path)
                   2257: {
                   2258:   int fd;
                   2259:   struct sockaddr_un sa;
                   2260: 
                   2261:   fd = socket(AF_UNIX, SOCK_STREAM, 0);
                   2262:   if (fd < 0)
                   2263:     die("Cannot create socket: %m");
                   2264:   if (strlen(path) >= sizeof(sa.sun_path))
                   2265:     die("Socket path too long");
                   2266:   bzero(&sa, sizeof(sa));
                   2267:   sa.sun_family = AF_UNIX;
                   2268:   strcpy(sa.sun_path, path);
                   2269:   if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
                   2270:     die("I found another BIRD running.");
                   2271:   close(fd);
                   2272: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>