Annotation of embedaddon/bird2/sysdep/unix/io.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:  *     BIRD Internet Routing Daemon -- Unix I/O
                      3:  *
                      4:  *     (c) 1998--2004 Martin Mares <mj@ucw.cz>
                      5:  *      (c) 2004       Ondrej Filip <feela@network.cz>
                      6:  *
                      7:  *     Can be freely distributed and used under the terms of the GNU GPL.
                      8:  */
                      9: 
                     10: /* Unfortunately, some glibc versions hide parts of RFC 3542 API
                     11:    if _GNU_SOURCE is not defined. */
                     12: #ifndef _GNU_SOURCE
                     13: #define _GNU_SOURCE
                     14: #endif
                     15: 
                     16: #include <stdio.h>
                     17: #include <stdlib.h>
                     18: #include <time.h>
                     19: #include <sys/time.h>
                     20: #include <sys/types.h>
                     21: #include <sys/socket.h>
                     22: #include <sys/uio.h>
                     23: #include <sys/un.h>
                     24: #include <poll.h>
                     25: #include <unistd.h>
                     26: #include <fcntl.h>
                     27: #include <errno.h>
                     28: #include <net/if.h>
                     29: #include <netinet/in.h>
                     30: #include <netinet/tcp.h>
                     31: #include <netinet/udp.h>
                     32: #include <netinet/icmp6.h>
                     33: 
                     34: #include "nest/bird.h"
                     35: #include "lib/lists.h"
                     36: #include "lib/resource.h"
                     37: #include "lib/socket.h"
                     38: #include "lib/event.h"
                     39: #include "lib/timer.h"
                     40: #include "lib/string.h"
                     41: #include "nest/iface.h"
                     42: #include "conf/conf.h"
                     43: 
                     44: #include "sysdep/unix/unix.h"
                     45: #include CONFIG_INCLUDE_SYSIO_H
                     46: 
                     47: /* Maximum number of calls of tx handler for one socket in one
                     48:  * poll iteration. Should be small enough to not monopolize CPU by
                     49:  * one protocol instance.
                     50:  */
                     51: #define MAX_STEPS 4
                     52: 
                     53: /* Maximum number of calls of rx handler for all sockets in one poll
                     54:    iteration. RX callbacks are often much more costly so we limit
                     55:    this to gen small latencies */
                     56: #define MAX_RX_STEPS 4
                     57: 
                     58: 
                     59: /*
                     60:  *     Tracked Files
                     61:  */
                     62: 
                     63: struct rfile {
                     64:   resource r;
                     65:   FILE *f;
                     66: };
                     67: 
                     68: static void
                     69: rf_free(resource *r)
                     70: {
                     71:   struct rfile *a = (struct rfile *) r;
                     72: 
                     73:   fclose(a->f);
                     74: }
                     75: 
                     76: static void
                     77: rf_dump(resource *r)
                     78: {
                     79:   struct rfile *a = (struct rfile *) r;
                     80: 
                     81:   debug("(FILE *%p)\n", a->f);
                     82: }
                     83: 
                     84: static struct resclass rf_class = {
                     85:   "FILE",
                     86:   sizeof(struct rfile),
                     87:   rf_free,
                     88:   rf_dump,
                     89:   NULL,
                     90:   NULL
                     91: };
                     92: 
                     93: struct rfile *
                     94: rf_open(pool *p, char *name, char *mode)
                     95: {
                     96:   FILE *f = fopen(name, mode);
                     97: 
                     98:   if (!f)
                     99:     return NULL;
                    100: 
                    101:   struct rfile *r = ralloc(p, &rf_class);
                    102:   r->f = f;
                    103:   return r;
                    104: }
                    105: 
                    106: void *
                    107: rf_file(struct rfile *f)
                    108: {
                    109:   return f->f;
                    110: }
                    111: 
                    112: int
                    113: rf_fileno(struct rfile *f)
                    114: {
                    115:   return fileno(f->f);
                    116: }
                    117: 
                    118: 
                    119: /*
                    120:  *     Time clock
                    121:  */
                    122: 
                    123: btime boot_time;
                    124: 
                    125: void
                    126: times_init(struct timeloop *loop)
                    127: {
                    128:   struct timespec ts;
                    129:   int rv;
                    130: 
                    131:   rv = clock_gettime(CLOCK_MONOTONIC, &ts);
                    132:   if (rv < 0)
                    133:     die("Monotonic clock is missing");
                    134: 
                    135:   if ((ts.tv_sec < 0) || (((u64) ts.tv_sec) > ((u64) 1 << 40)))
                    136:     log(L_WARN "Monotonic clock is crazy");
                    137: 
                    138:   loop->last_time = ts.tv_sec S + ts.tv_nsec NS;
                    139:   loop->real_time = 0;
                    140: }
                    141: 
                    142: void
                    143: times_update(struct timeloop *loop)
                    144: {
                    145:   struct timespec ts;
                    146:   int rv;
                    147: 
                    148:   rv = clock_gettime(CLOCK_MONOTONIC, &ts);
                    149:   if (rv < 0)
                    150:     die("clock_gettime: %m");
                    151: 
                    152:   btime new_time = ts.tv_sec S + ts.tv_nsec NS;
                    153: 
                    154:   if (new_time < loop->last_time)
                    155:     log(L_ERR "Monotonic clock is broken");
                    156: 
                    157:   loop->last_time = new_time;
                    158:   loop->real_time = 0;
                    159: }
                    160: 
                    161: void
                    162: times_update_real_time(struct timeloop *loop)
                    163: {
                    164:   struct timespec ts;
                    165:   int rv;
                    166: 
                    167:   rv = clock_gettime(CLOCK_REALTIME, &ts);
                    168:   if (rv < 0)
                    169:     die("clock_gettime: %m");
                    170: 
                    171:   loop->real_time = ts.tv_sec S + ts.tv_nsec NS;
                    172: }
                    173: 
                    174: 
                    175: /**
                    176:  * DOC: Sockets
                    177:  *
                    178:  * Socket resources represent network connections. Their data structure (&socket)
                    179:  * contains a lot of fields defining the exact type of the socket, the local and
                    180:  * remote addresses and ports, pointers to socket buffers and finally pointers to
                    181:  * hook functions to be called when new data have arrived to the receive buffer
                    182:  * (@rx_hook), when the contents of the transmit buffer have been transmitted
                    183:  * (@tx_hook) and when an error or connection close occurs (@err_hook).
                    184:  *
                    185:  * Freeing of sockets from inside socket hooks is perfectly safe.
                    186:  */
                    187: 
                    188: #ifndef SOL_IP
                    189: #define SOL_IP IPPROTO_IP
                    190: #endif
                    191: 
                    192: #ifndef SOL_IPV6
                    193: #define SOL_IPV6 IPPROTO_IPV6
                    194: #endif
                    195: 
                    196: #ifndef SOL_ICMPV6
                    197: #define SOL_ICMPV6 IPPROTO_ICMPV6
                    198: #endif
                    199: 
                    200: 
                    201: /*
                    202:  *     Sockaddr helper functions
                    203:  */
                    204: 
                    205: static inline int UNUSED sockaddr_length(int af)
                    206: { return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
                    207: 
                    208: static inline void
                    209: sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
                    210: {
                    211:   memset(sa, 0, sizeof(struct sockaddr_in));
                    212: #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
                    213:   sa->sin_len = sizeof(struct sockaddr_in);
                    214: #endif
                    215:   sa->sin_family = AF_INET;
                    216:   sa->sin_port = htons(port);
                    217:   sa->sin_addr = ipa_to_in4(a);
                    218: }
                    219: 
                    220: static inline void
                    221: sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
                    222: {
                    223:   memset(sa, 0, sizeof(struct sockaddr_in6));
                    224: #ifdef SIN6_LEN
                    225:   sa->sin6_len = sizeof(struct sockaddr_in6);
                    226: #endif
                    227:   sa->sin6_family = AF_INET6;
                    228:   sa->sin6_port = htons(port);
                    229:   sa->sin6_flowinfo = 0;
                    230:   sa->sin6_addr = ipa_to_in6(a);
                    231: 
                    232:   if (ifa && ipa_is_link_local(a))
                    233:     sa->sin6_scope_id = ifa->index;
                    234: }
                    235: 
                    236: void
                    237: sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
                    238: {
                    239:   if (af == AF_INET)
                    240:     sockaddr_fill4((struct sockaddr_in *) sa, a, port);
                    241:   else if (af == AF_INET6)
                    242:     sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
                    243:   else
                    244:     bug("Unknown AF");
                    245: }
                    246: 
                    247: static inline void
                    248: sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
                    249: {
                    250:   *port = ntohs(sa->sin_port);
                    251:   *a = ipa_from_in4(sa->sin_addr);
                    252: }
                    253: 
                    254: static inline void
                    255: sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
                    256: {
                    257:   *port = ntohs(sa->sin6_port);
                    258:   *a = ipa_from_in6(sa->sin6_addr);
                    259: 
                    260:   if (ifa && ipa_is_link_local(*a))
                    261:     *ifa = if_find_by_index(sa->sin6_scope_id);
                    262: }
                    263: 
                    264: int
                    265: sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
                    266: {
                    267:   if (sa->sa.sa_family != af)
                    268:     goto fail;
                    269: 
                    270:   if (af == AF_INET)
                    271:     sockaddr_read4((struct sockaddr_in *) sa, a, port);
                    272:   else if (af == AF_INET6)
                    273:     sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
                    274:   else
                    275:     goto fail;
                    276: 
                    277:   return 0;
                    278: 
                    279:  fail:
                    280:   *a = IPA_NONE;
                    281:   *port = 0;
                    282:   return -1;
                    283: }
                    284: 
                    285: 
                    286: /*
                    287:  *     IPv6 multicast syscalls
                    288:  */
                    289: 
                    290: /* Fortunately standardized in RFC 3493 */
                    291: 
                    292: #define INIT_MREQ6(maddr,ifa) \
                    293:   { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
                    294: 
                    295: static inline int
                    296: sk_setup_multicast6(sock *s)
                    297: {
                    298:   int index = s->iface->index;
                    299:   int ttl = s->ttl;
                    300:   int n = 0;
                    301: 
                    302:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
                    303:     ERR("IPV6_MULTICAST_IF");
                    304: 
                    305:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
                    306:     ERR("IPV6_MULTICAST_HOPS");
                    307: 
                    308:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
                    309:     ERR("IPV6_MULTICAST_LOOP");
                    310: 
                    311:   return 0;
                    312: }
                    313: 
                    314: static inline int
                    315: sk_join_group6(sock *s, ip_addr maddr)
                    316: {
                    317:   struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
                    318: 
                    319:   if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
                    320:     ERR("IPV6_JOIN_GROUP");
                    321: 
                    322:   return 0;
                    323: }
                    324: 
                    325: static inline int
                    326: sk_leave_group6(sock *s, ip_addr maddr)
                    327: {
                    328:   struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
                    329: 
                    330:   if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
                    331:     ERR("IPV6_LEAVE_GROUP");
                    332: 
                    333:   return 0;
                    334: }
                    335: 
                    336: 
                    337: /*
                    338:  *     IPv6 packet control messages
                    339:  */
                    340: 
                    341: /* Also standardized, in RFC 3542 */
                    342: 
                    343: /*
                    344:  * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
                    345:  * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
                    346:  * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
                    347:  * RFC and we use IPV6_PKTINFO.
                    348:  */
                    349: #ifndef IPV6_RECVPKTINFO
                    350: #define IPV6_RECVPKTINFO IPV6_PKTINFO
                    351: #endif
                    352: /*
                    353:  * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
                    354:  */
                    355: #ifndef IPV6_RECVHOPLIMIT
                    356: #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
                    357: #endif
                    358: 
                    359: 
                    360: #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
                    361: #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
                    362: 
                    363: static inline int
                    364: sk_request_cmsg6_pktinfo(sock *s)
                    365: {
                    366:   int y = 1;
                    367: 
                    368:   if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
                    369:     ERR("IPV6_RECVPKTINFO");
                    370: 
                    371:   return 0;
                    372: }
                    373: 
                    374: static inline int
                    375: sk_request_cmsg6_ttl(sock *s)
                    376: {
                    377:   int y = 1;
                    378: 
                    379:   if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
                    380:     ERR("IPV6_RECVHOPLIMIT");
                    381: 
                    382:   return 0;
                    383: }
                    384: 
                    385: static inline void
                    386: sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
                    387: {
                    388:   if (cm->cmsg_type == IPV6_PKTINFO)
                    389:   {
                    390:     struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
                    391:     s->laddr = ipa_from_in6(pi->ipi6_addr);
                    392:     s->lifindex = pi->ipi6_ifindex;
                    393:   }
                    394: }
                    395: 
                    396: static inline void
                    397: sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
                    398: {
                    399:   if (cm->cmsg_type == IPV6_HOPLIMIT)
                    400:     s->rcv_ttl = * (int *) CMSG_DATA(cm);
                    401: }
                    402: 
                    403: static inline void
                    404: sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
                    405: {
                    406:   struct cmsghdr *cm;
                    407:   struct in6_pktinfo *pi;
                    408:   int controllen = 0;
                    409: 
                    410:   msg->msg_control = cbuf;
                    411:   msg->msg_controllen = cbuflen;
                    412: 
                    413:   cm = CMSG_FIRSTHDR(msg);
                    414:   cm->cmsg_level = SOL_IPV6;
                    415:   cm->cmsg_type = IPV6_PKTINFO;
                    416:   cm->cmsg_len = CMSG_LEN(sizeof(*pi));
                    417:   controllen += CMSG_SPACE(sizeof(*pi));
                    418: 
                    419:   pi = (struct in6_pktinfo *) CMSG_DATA(cm);
                    420:   pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
                    421:   pi->ipi6_addr = ipa_to_in6(s->saddr);
                    422: 
                    423:   msg->msg_controllen = controllen;
                    424: }
                    425: 
                    426: 
                    427: /*
                    428:  *     Miscellaneous socket syscalls
                    429:  */
                    430: 
                    431: static inline int
                    432: sk_set_ttl4(sock *s, int ttl)
                    433: {
                    434:   if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
                    435:     ERR("IP_TTL");
                    436: 
                    437:   return 0;
                    438: }
                    439: 
                    440: static inline int
                    441: sk_set_ttl6(sock *s, int ttl)
                    442: {
                    443:   if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
                    444:     ERR("IPV6_UNICAST_HOPS");
                    445: 
                    446:   return 0;
                    447: }
                    448: 
                    449: static inline int
                    450: sk_set_tos4(sock *s, int tos)
                    451: {
                    452:   if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
                    453:     ERR("IP_TOS");
                    454: 
                    455:   return 0;
                    456: }
                    457: 
                    458: static inline int
                    459: sk_set_tos6(sock *s, int tos)
                    460: {
                    461:   if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
                    462:     ERR("IPV6_TCLASS");
                    463: 
                    464:   return 0;
                    465: }
                    466: 
                    467: static inline int
                    468: sk_set_high_port(sock *s UNUSED)
                    469: {
                    470:   /* Port range setting is optional, ignore it if not supported */
                    471: 
                    472: #ifdef IP_PORTRANGE
                    473:   if (sk_is_ipv4(s))
                    474:   {
                    475:     int range = IP_PORTRANGE_HIGH;
                    476:     if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
                    477:       ERR("IP_PORTRANGE");
                    478:   }
                    479: #endif
                    480: 
                    481: #ifdef IPV6_PORTRANGE
                    482:   if (sk_is_ipv6(s))
                    483:   {
                    484:     int range = IPV6_PORTRANGE_HIGH;
                    485:     if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
                    486:       ERR("IPV6_PORTRANGE");
                    487:   }
                    488: #endif
                    489: 
                    490:   return 0;
                    491: }
                    492: 
                    493: static inline byte *
                    494: sk_skip_ip_header(byte *pkt, int *len)
                    495: {
                    496:   if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
                    497:     return NULL;
                    498: 
                    499:   int hlen = (*pkt & 0x0f) * 4;
                    500:   if ((hlen < 20) || (hlen > *len))
                    501:     return NULL;
                    502: 
                    503:   *len -= hlen;
                    504:   return pkt + hlen;
                    505: }
                    506: 
                    507: byte *
                    508: sk_rx_buffer(sock *s, int *len)
                    509: {
                    510:   if (sk_is_ipv4(s) && (s->type == SK_IP))
                    511:     return sk_skip_ip_header(s->rbuf, len);
                    512:   else
                    513:     return s->rbuf;
                    514: }
                    515: 
                    516: 
                    517: /*
                    518:  *     Public socket functions
                    519:  */
                    520: 
                    521: /**
                    522:  * sk_setup_multicast - enable multicast for given socket
                    523:  * @s: socket
                    524:  *
                    525:  * Prepare transmission of multicast packets for given datagram socket.
                    526:  * The socket must have defined @iface.
                    527:  *
                    528:  * Result: 0 for success, -1 for an error.
                    529:  */
                    530: 
                    531: int
                    532: sk_setup_multicast(sock *s)
                    533: {
                    534:   ASSERT(s->iface);
                    535: 
                    536:   if (sk_is_ipv4(s))
                    537:     return sk_setup_multicast4(s);
                    538:   else
                    539:     return sk_setup_multicast6(s);
                    540: }
                    541: 
                    542: /**
                    543:  * sk_join_group - join multicast group for given socket
                    544:  * @s: socket
                    545:  * @maddr: multicast address
                    546:  *
                    547:  * Join multicast group for given datagram socket and associated interface.
                    548:  * The socket must have defined @iface.
                    549:  *
                    550:  * Result: 0 for success, -1 for an error.
                    551:  */
                    552: 
                    553: int
                    554: sk_join_group(sock *s, ip_addr maddr)
                    555: {
                    556:   if (sk_is_ipv4(s))
                    557:     return sk_join_group4(s, maddr);
                    558:   else
                    559:     return sk_join_group6(s, maddr);
                    560: }
                    561: 
                    562: /**
                    563:  * sk_leave_group - leave multicast group for given socket
                    564:  * @s: socket
                    565:  * @maddr: multicast address
                    566:  *
                    567:  * Leave multicast group for given datagram socket and associated interface.
                    568:  * The socket must have defined @iface.
                    569:  *
                    570:  * Result: 0 for success, -1 for an error.
                    571:  */
                    572: 
                    573: int
                    574: sk_leave_group(sock *s, ip_addr maddr)
                    575: {
                    576:   if (sk_is_ipv4(s))
                    577:     return sk_leave_group4(s, maddr);
                    578:   else
                    579:     return sk_leave_group6(s, maddr);
                    580: }
                    581: 
                    582: /**
                    583:  * sk_setup_broadcast - enable broadcast for given socket
                    584:  * @s: socket
                    585:  *
                    586:  * Allow reception and transmission of broadcast packets for given datagram
                    587:  * socket. The socket must have defined @iface. For transmission, packets should
                    588:  * be send to @brd address of @iface.
                    589:  *
                    590:  * Result: 0 for success, -1 for an error.
                    591:  */
                    592: 
                    593: int
                    594: sk_setup_broadcast(sock *s)
                    595: {
                    596:   int y = 1;
                    597: 
                    598:   if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
                    599:     ERR("SO_BROADCAST");
                    600: 
                    601:   return 0;
                    602: }
                    603: 
                    604: /**
                    605:  * sk_set_ttl - set transmit TTL for given socket
                    606:  * @s: socket
                    607:  * @ttl: TTL value
                    608:  *
                    609:  * Set TTL for already opened connections when TTL was not set before. Useful
                    610:  * for accepted connections when different ones should have different TTL.
                    611:  *
                    612:  * Result: 0 for success, -1 for an error.
                    613:  */
                    614: 
                    615: int
                    616: sk_set_ttl(sock *s, int ttl)
                    617: {
                    618:   s->ttl = ttl;
                    619: 
                    620:   if (sk_is_ipv4(s))
                    621:     return sk_set_ttl4(s, ttl);
                    622:   else
                    623:     return sk_set_ttl6(s, ttl);
                    624: }
                    625: 
                    626: /**
                    627:  * sk_set_min_ttl - set minimal accepted TTL for given socket
                    628:  * @s: socket
                    629:  * @ttl: TTL value
                    630:  *
                    631:  * Set minimal accepted TTL for given socket. Can be used for TTL security.
                    632:  * implementations.
                    633:  *
                    634:  * Result: 0 for success, -1 for an error.
                    635:  */
                    636: 
                    637: int
                    638: sk_set_min_ttl(sock *s, int ttl)
                    639: {
                    640:   if (sk_is_ipv4(s))
                    641:     return sk_set_min_ttl4(s, ttl);
                    642:   else
                    643:     return sk_set_min_ttl6(s, ttl);
                    644: }
                    645: 
                    646: #if 0
                    647: /**
                    648:  * sk_set_md5_auth - add / remove MD5 security association for given socket
                    649:  * @s: socket
                    650:  * @local: IP address of local side
                    651:  * @remote: IP address of remote side
                    652:  * @ifa: Interface for link-local IP address
                    653:  * @passwd: Password used for MD5 authentication
                    654:  * @setkey: Update also system SA/SP database
                    655:  *
                    656:  * In TCP MD5 handling code in kernel, there is a set of security associations
                    657:  * used for choosing password and other authentication parameters according to
                    658:  * the local and remote address. This function is useful for listening socket,
                    659:  * for active sockets it may be enough to set s->password field.
                    660:  *
                    661:  * When called with passwd != NULL, the new pair is added,
                    662:  * When called with passwd == NULL, the existing pair is removed.
                    663:  *
                    664:  * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
                    665:  * stored in global SA/SP database (but the behavior also must be enabled on
                    666:  * per-socket basis). In case of multiple sockets to the same neighbor, the
                    667:  * socket-specific state must be configured for each socket while global state
                    668:  * just once per src-dst pair. The @setkey argument controls whether the global
                    669:  * state (SA/SP database) is also updated.
                    670:  *
                    671:  * Result: 0 for success, -1 for an error.
                    672:  */
                    673: 
                    674: int
                    675: sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
                    676: { DUMMY; }
                    677: #endif
                    678: 
                    679: /**
                    680:  * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
                    681:  * @s: socket
                    682:  * @offset: offset
                    683:  *
                    684:  * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
                    685:  * kernel will automatically fill it for outgoing packets and check it for
                    686:  * incoming packets. Should not be used on ICMPv6 sockets, where the position is
                    687:  * known to the kernel.
                    688:  *
                    689:  * Result: 0 for success, -1 for an error.
                    690:  */
                    691: 
                    692: int
                    693: sk_set_ipv6_checksum(sock *s, int offset)
                    694: {
                    695:   if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
                    696:     ERR("IPV6_CHECKSUM");
                    697: 
                    698:   return 0;
                    699: }
                    700: 
                    701: int
                    702: sk_set_icmp6_filter(sock *s, int p1, int p2)
                    703: {
                    704:   /* a bit of lame interface, but it is here only for Radv */
                    705:   struct icmp6_filter f;
                    706: 
                    707:   ICMP6_FILTER_SETBLOCKALL(&f);
                    708:   ICMP6_FILTER_SETPASS(p1, &f);
                    709:   ICMP6_FILTER_SETPASS(p2, &f);
                    710: 
                    711:   if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
                    712:     ERR("ICMP6_FILTER");
                    713: 
                    714:   return 0;
                    715: }
                    716: 
                    717: void
                    718: sk_log_error(sock *s, const char *p)
                    719: {
                    720:   log(L_ERR "%s: Socket error: %s%#m", p, s->err);
                    721: }
                    722: 
                    723: 
                    724: /*
                    725:  *     Actual struct birdsock code
                    726:  */
                    727: 
                    728: static list sock_list;
                    729: static struct birdsock *current_sock;
                    730: static struct birdsock *stored_sock;
                    731: 
                    732: static inline sock *
                    733: sk_next(sock *s)
                    734: {
                    735:   if (!s->n.next->next)
                    736:     return NULL;
                    737:   else
                    738:     return SKIP_BACK(sock, n, s->n.next);
                    739: }
                    740: 
                    741: static void
                    742: sk_alloc_bufs(sock *s)
                    743: {
                    744:   if (!s->rbuf && s->rbsize)
                    745:     s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
                    746:   s->rpos = s->rbuf;
                    747:   if (!s->tbuf && s->tbsize)
                    748:     s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
                    749:   s->tpos = s->ttx = s->tbuf;
                    750: }
                    751: 
                    752: static void
                    753: sk_free_bufs(sock *s)
                    754: {
                    755:   if (s->rbuf_alloc)
                    756:   {
                    757:     xfree(s->rbuf_alloc);
                    758:     s->rbuf = s->rbuf_alloc = NULL;
                    759:   }
                    760:   if (s->tbuf_alloc)
                    761:   {
                    762:     xfree(s->tbuf_alloc);
                    763:     s->tbuf = s->tbuf_alloc = NULL;
                    764:   }
                    765: }
                    766: 
                    767: #ifdef HAVE_LIBSSH
                    768: static void
                    769: sk_ssh_free(sock *s)
                    770: {
                    771:   struct ssh_sock *ssh = s->ssh;
                    772: 
                    773:   if (s->ssh == NULL)
                    774:     return;
                    775: 
                    776:   s->ssh = NULL;
                    777: 
                    778:   if (ssh->channel)
                    779:   {
                    780:     if (ssh_channel_is_open(ssh->channel))
                    781:       ssh_channel_close(ssh->channel);
                    782:     ssh_channel_free(ssh->channel);
                    783:     ssh->channel = NULL;
                    784:   }
                    785: 
                    786:   if (ssh->session)
                    787:   {
                    788:     ssh_disconnect(ssh->session);
                    789:     ssh_free(ssh->session);
                    790:     ssh->session = NULL;
                    791:   }
                    792: }
                    793: #endif
                    794: 
                    795: static void
                    796: sk_free(resource *r)
                    797: {
                    798:   sock *s = (sock *) r;
                    799: 
                    800:   sk_free_bufs(s);
                    801: 
                    802: #ifdef HAVE_LIBSSH
                    803:   if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE)
                    804:     sk_ssh_free(s);
                    805: #endif
                    806: 
                    807:   if (s->fd < 0)
                    808:     return;
                    809: 
                    810:   /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
                    811:   if (!(s->flags & SKF_THREAD))
                    812:   {
                    813:     if (s == current_sock)
                    814:       current_sock = sk_next(s);
                    815:     if (s == stored_sock)
                    816:       stored_sock = sk_next(s);
                    817:     rem_node(&s->n);
                    818:   }
                    819: 
                    820:   if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
                    821:     close(s->fd);
                    822: 
                    823:   s->fd = -1;
                    824: }
                    825: 
                    826: void
                    827: sk_set_rbsize(sock *s, uint val)
                    828: {
                    829:   ASSERT(s->rbuf_alloc == s->rbuf);
                    830: 
                    831:   if (s->rbsize == val)
                    832:     return;
                    833: 
                    834:   s->rbsize = val;
                    835:   xfree(s->rbuf_alloc);
                    836:   s->rbuf_alloc = xmalloc(val);
                    837:   s->rpos = s->rbuf = s->rbuf_alloc;
                    838: }
                    839: 
                    840: void
                    841: sk_set_tbsize(sock *s, uint val)
                    842: {
                    843:   ASSERT(s->tbuf_alloc == s->tbuf);
                    844: 
                    845:   if (s->tbsize == val)
                    846:     return;
                    847: 
                    848:   byte *old_tbuf = s->tbuf;
                    849: 
                    850:   s->tbsize = val;
                    851:   s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
                    852:   s->tpos = s->tbuf + (s->tpos - old_tbuf);
                    853:   s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
                    854: }
                    855: 
                    856: void
                    857: sk_set_tbuf(sock *s, void *tbuf)
                    858: {
                    859:   s->tbuf = tbuf ?: s->tbuf_alloc;
                    860:   s->ttx = s->tpos = s->tbuf;
                    861: }
                    862: 
                    863: void
                    864: sk_reallocate(sock *s)
                    865: {
                    866:   sk_free_bufs(s);
                    867:   sk_alloc_bufs(s);
                    868: }
                    869: 
                    870: static void
                    871: sk_dump(resource *r)
                    872: {
                    873:   sock *s = (sock *) r;
                    874:   static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
                    875: 
                    876:   debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
                    877:        sk_type_names[s->type],
                    878:        s->data,
                    879:        s->saddr,
                    880:        s->sport,
                    881:        s->daddr,
                    882:        s->dport,
                    883:        s->tos,
                    884:        s->ttl,
                    885:        s->iface ? s->iface->name : "none");
                    886: }
                    887: 
                    888: static struct resclass sk_class = {
                    889:   "Socket",
                    890:   sizeof(sock),
                    891:   sk_free,
                    892:   sk_dump,
                    893:   NULL,
                    894:   NULL
                    895: };
                    896: 
                    897: /**
                    898:  * sk_new - create a socket
                    899:  * @p: pool
                    900:  *
                    901:  * This function creates a new socket resource. If you want to use it,
                    902:  * you need to fill in all the required fields of the structure and
                    903:  * call sk_open() to do the actual opening of the socket.
                    904:  *
                    905:  * The real function name is sock_new(), sk_new() is a macro wrapper
                    906:  * to avoid collision with OpenSSL.
                    907:  */
                    908: sock *
                    909: sock_new(pool *p)
                    910: {
                    911:   sock *s = ralloc(p, &sk_class);
                    912:   s->pool = p;
                    913:   // s->saddr = s->daddr = IPA_NONE;
                    914:   s->tos = s->priority = s->ttl = -1;
                    915:   s->fd = -1;
                    916:   return s;
                    917: }
                    918: 
                    919: static int
                    920: sk_setup(sock *s)
                    921: {
                    922:   int y = 1;
                    923:   int fd = s->fd;
                    924: 
                    925:   if (s->type == SK_SSH_ACTIVE)
                    926:     return 0;
                    927: 
                    928:   if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
                    929:     ERR("O_NONBLOCK");
                    930: 
                    931:   if (!s->af)
                    932:     return 0;
                    933: 
                    934:   if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
                    935:     s->flags |= SKF_PKTINFO;
                    936: 
                    937: #ifdef CONFIG_USE_HDRINCL
                    938:   if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
                    939:   {
                    940:     s->flags &= ~SKF_PKTINFO;
                    941:     s->flags |= SKF_HDRINCL;
                    942:     if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
                    943:       ERR("IP_HDRINCL");
                    944:   }
                    945: #endif
                    946: 
                    947:   if (s->vrf && !s->iface)
                    948:   {
                    949:     /* Bind socket to associated VRF interface.
                    950:        This is Linux-specific, but so is SO_BINDTODEVICE. */
                    951: #ifdef SO_BINDTODEVICE
                    952:     struct ifreq ifr = {};
                    953:     strcpy(ifr.ifr_name, s->vrf->name);
                    954:     if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
                    955:       ERR("SO_BINDTODEVICE");
                    956: #endif
                    957:   }
                    958: 
                    959:   if (s->iface)
                    960:   {
                    961: #ifdef SO_BINDTODEVICE
                    962:     struct ifreq ifr = {};
                    963:     strcpy(ifr.ifr_name, s->iface->name);
                    964:     if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
                    965:       ERR("SO_BINDTODEVICE");
                    966: #endif
                    967: 
                    968: #ifdef CONFIG_UNIX_DONTROUTE
                    969:     if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
                    970:       ERR("SO_DONTROUTE");
                    971: #endif
                    972:   }
                    973: 
                    974:   if (sk_is_ipv4(s))
                    975:   {
                    976:     if (s->flags & SKF_LADDR_RX)
                    977:       if (sk_request_cmsg4_pktinfo(s) < 0)
                    978:        return -1;
                    979: 
                    980:     if (s->flags & SKF_TTL_RX)
                    981:       if (sk_request_cmsg4_ttl(s) < 0)
                    982:        return -1;
                    983: 
                    984:     if ((s->type == SK_UDP) || (s->type == SK_IP))
                    985:       if (sk_disable_mtu_disc4(s) < 0)
                    986:        return -1;
                    987: 
                    988:     if (s->ttl >= 0)
                    989:       if (sk_set_ttl4(s, s->ttl) < 0)
                    990:        return -1;
                    991: 
                    992:     if (s->tos >= 0)
                    993:       if (sk_set_tos4(s, s->tos) < 0)
                    994:        return -1;
                    995:   }
                    996: 
                    997:   if (sk_is_ipv6(s))
                    998:   {
                    999:     if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP))
                   1000:       if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
                   1001:        ERR("IPV6_V6ONLY");
                   1002: 
                   1003:     if (s->flags & SKF_LADDR_RX)
                   1004:       if (sk_request_cmsg6_pktinfo(s) < 0)
                   1005:        return -1;
                   1006: 
                   1007:     if (s->flags & SKF_TTL_RX)
                   1008:       if (sk_request_cmsg6_ttl(s) < 0)
                   1009:        return -1;
                   1010: 
                   1011:     if ((s->type == SK_UDP) || (s->type == SK_IP))
                   1012:       if (sk_disable_mtu_disc6(s) < 0)
                   1013:        return -1;
                   1014: 
                   1015:     if (s->ttl >= 0)
                   1016:       if (sk_set_ttl6(s, s->ttl) < 0)
                   1017:        return -1;
                   1018: 
                   1019:     if (s->tos >= 0)
                   1020:       if (sk_set_tos6(s, s->tos) < 0)
                   1021:        return -1;
                   1022:   }
                   1023: 
                   1024:   /* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */
                   1025:   if (s->priority >= 0)
                   1026:     if (sk_set_priority(s, s->priority) < 0)
                   1027:       return -1;
                   1028: 
                   1029:   return 0;
                   1030: }
                   1031: 
                   1032: static void
                   1033: sk_insert(sock *s)
                   1034: {
                   1035:   add_tail(&sock_list, &s->n);
                   1036: }
                   1037: 
                   1038: static void
                   1039: sk_tcp_connected(sock *s)
                   1040: {
                   1041:   sockaddr sa;
                   1042:   int sa_len = sizeof(sa);
                   1043: 
                   1044:   if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
                   1045:       (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
                   1046:     log(L_WARN "SOCK: Cannot get local IP address for TCP>");
                   1047: 
                   1048:   s->type = SK_TCP;
                   1049:   sk_alloc_bufs(s);
                   1050:   s->tx_hook(s);
                   1051: }
                   1052: 
                   1053: #ifdef HAVE_LIBSSH
                   1054: static void
                   1055: sk_ssh_connected(sock *s)
                   1056: {
                   1057:   sk_alloc_bufs(s);
                   1058:   s->type = SK_SSH;
                   1059:   s->tx_hook(s);
                   1060: }
                   1061: #endif
                   1062: 
                   1063: static int
                   1064: sk_passive_connected(sock *s, int type)
                   1065: {
                   1066:   sockaddr loc_sa, rem_sa;
                   1067:   int loc_sa_len = sizeof(loc_sa);
                   1068:   int rem_sa_len = sizeof(rem_sa);
                   1069: 
                   1070:   int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
                   1071:   if (fd < 0)
                   1072:   {
                   1073:     if ((errno != EINTR) && (errno != EAGAIN))
                   1074:       s->err_hook(s, errno);
                   1075:     return 0;
                   1076:   }
                   1077: 
                   1078:   sock *t = sk_new(s->pool);
                   1079:   t->type = type;
                   1080:   t->data = s->data;
                   1081:   t->af = s->af;
                   1082:   t->fd = fd;
                   1083:   t->ttl = s->ttl;
                   1084:   t->tos = s->tos;
                   1085:   t->vrf = s->vrf;
                   1086:   t->rbsize = s->rbsize;
                   1087:   t->tbsize = s->tbsize;
                   1088: 
                   1089:   if (type == SK_TCP)
                   1090:   {
                   1091:     if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
                   1092:        (sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
                   1093:       log(L_WARN "SOCK: Cannot get local IP address for TCP<");
                   1094: 
                   1095:     if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
                   1096:       log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
                   1097:   }
                   1098: 
                   1099:   if (sk_setup(t) < 0)
                   1100:   {
                   1101:     /* FIXME: Call err_hook instead ? */
                   1102:     log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
                   1103: 
                   1104:     /* FIXME: handle it better in rfree() */
                   1105:     close(t->fd);
                   1106:     t->fd = -1;
                   1107:     rfree(t);
                   1108:     return 1;
                   1109:   }
                   1110: 
                   1111:   sk_insert(t);
                   1112:   sk_alloc_bufs(t);
                   1113:   s->rx_hook(t, 0);
                   1114:   return 1;
                   1115: }
                   1116: 
                   1117: #ifdef HAVE_LIBSSH
                   1118: /*
                   1119:  * Return SSH_OK or SSH_AGAIN or SSH_ERROR
                   1120:  */
                   1121: static int
                   1122: sk_ssh_connect(sock *s)
                   1123: {
                   1124:   s->fd = ssh_get_fd(s->ssh->session);
                   1125: 
                   1126:   /* Big fall thru automata */
                   1127:   switch (s->ssh->state)
                   1128:   {
                   1129:   case SK_SSH_CONNECT:
                   1130:   {
                   1131:     switch (ssh_connect(s->ssh->session))
                   1132:     {
                   1133:     case SSH_AGAIN:
                   1134:       /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
                   1135:        * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
                   1136:        * documented but our code relies on that.
                   1137:        */
                   1138:       return SSH_AGAIN;
                   1139: 
                   1140:     case SSH_OK:
                   1141:       break;
                   1142: 
                   1143:     default:
                   1144:       return SSH_ERROR;
                   1145:     }
                   1146:   } /* fallthrough */
                   1147: 
                   1148:   case SK_SSH_SERVER_KNOWN:
                   1149:   {
                   1150:     s->ssh->state = SK_SSH_SERVER_KNOWN;
                   1151: 
                   1152:     if (s->ssh->server_hostkey_path)
                   1153:     {
                   1154:       int server_identity_is_ok = 1;
                   1155: 
                   1156:       /* Check server identity */
                   1157:       switch (ssh_is_server_known(s->ssh->session))
                   1158:       {
                   1159: #define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
                   1160:       case SSH_SERVER_KNOWN_OK:
                   1161:        /* The server is known and has not changed. */
                   1162:        break;
                   1163: 
                   1164:       case SSH_SERVER_NOT_KNOWN:
                   1165:        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path);
                   1166:        break;
                   1167: 
                   1168:       case SSH_SERVER_KNOWN_CHANGED:
                   1169:        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key.");
                   1170:        server_identity_is_ok = 0;
                   1171:        break;
                   1172: 
                   1173:       case SSH_SERVER_FILE_NOT_FOUND:
                   1174:        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path);
                   1175:        server_identity_is_ok = 0;
                   1176:        break;
                   1177: 
                   1178:       case SSH_SERVER_ERROR:
                   1179:        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened");
                   1180:        server_identity_is_ok = 0;
                   1181:        break;
                   1182: 
                   1183:       case SSH_SERVER_FOUND_OTHER:
                   1184:        LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \
                   1185:                                             "It is a possible attack.");
                   1186:        server_identity_is_ok = 0;
                   1187:        break;
                   1188:       }
                   1189: 
                   1190:       if (!server_identity_is_ok)
                   1191:        return SSH_ERROR;
                   1192:     }
                   1193:   } /* fallthrough */
                   1194: 
                   1195:   case SK_SSH_USERAUTH:
                   1196:   {
                   1197:     s->ssh->state = SK_SSH_USERAUTH;
                   1198:     switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL))
                   1199:     {
                   1200:     case SSH_AUTH_AGAIN:
                   1201:       return SSH_AGAIN;
                   1202: 
                   1203:     case SSH_AUTH_SUCCESS:
                   1204:       break;
                   1205: 
                   1206:     default:
                   1207:       return SSH_ERROR;
                   1208:     }
                   1209:   } /* fallthrough */
                   1210: 
                   1211:   case SK_SSH_CHANNEL:
                   1212:   {
                   1213:     s->ssh->state = SK_SSH_CHANNEL;
                   1214:     s->ssh->channel = ssh_channel_new(s->ssh->session);
                   1215:     if (s->ssh->channel == NULL)
                   1216:       return SSH_ERROR;
                   1217:   } /* fallthrough */
                   1218: 
                   1219:   case SK_SSH_SESSION:
                   1220:   {
                   1221:     s->ssh->state = SK_SSH_SESSION;
                   1222:     switch (ssh_channel_open_session(s->ssh->channel))
                   1223:     {
                   1224:     case SSH_AGAIN:
                   1225:       return SSH_AGAIN;
                   1226: 
                   1227:     case SSH_OK:
                   1228:       break;
                   1229: 
                   1230:     default:
                   1231:       return SSH_ERROR;
                   1232:     }
                   1233:   } /* fallthrough */
                   1234: 
                   1235:   case SK_SSH_SUBSYSTEM:
                   1236:   {
                   1237:     s->ssh->state = SK_SSH_SUBSYSTEM;
                   1238:     if (s->ssh->subsystem)
                   1239:     {
                   1240:       switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem))
                   1241:       {
                   1242:       case SSH_AGAIN:
                   1243:        return SSH_AGAIN;
                   1244: 
                   1245:       case SSH_OK:
                   1246:        break;
                   1247: 
                   1248:       default:
                   1249:        return SSH_ERROR;
                   1250:       }
                   1251:     }
                   1252:   } /* fallthrough */
                   1253: 
                   1254:   case SK_SSH_ESTABLISHED:
                   1255:     s->ssh->state = SK_SSH_ESTABLISHED;
                   1256:   }
                   1257: 
                   1258:   return SSH_OK;
                   1259: }
                   1260: 
                   1261: /*
                   1262:  * Return file descriptor number if success
                   1263:  * Return -1 if failed
                   1264:  */
                   1265: static int
                   1266: sk_open_ssh(sock *s)
                   1267: {
                   1268:   if (!s->ssh)
                   1269:     bug("sk_open() sock->ssh is not allocated");
                   1270: 
                   1271:   ssh_session sess = ssh_new();
                   1272:   if (sess == NULL)
                   1273:     ERR2("Cannot create a ssh session");
                   1274:   s->ssh->session = sess;
                   1275: 
                   1276:   const int verbosity = SSH_LOG_NOLOG;
                   1277:   ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity);
                   1278:   ssh_options_set(sess, SSH_OPTIONS_HOST, s->host);
                   1279:   ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport));
                   1280:   /* TODO: Add SSH_OPTIONS_BINDADDR */
                   1281:   ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username);
                   1282: 
                   1283:   if (s->ssh->server_hostkey_path)
                   1284:     ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path);
                   1285: 
                   1286:   if (s->ssh->client_privkey_path)
                   1287:     ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path);
                   1288: 
                   1289:   ssh_set_blocking(sess, 0);
                   1290: 
                   1291:   switch (sk_ssh_connect(s))
                   1292:   {
                   1293:   case SSH_AGAIN:
                   1294:     break;
                   1295: 
                   1296:   case SSH_OK:
                   1297:     sk_ssh_connected(s);
                   1298:     break;
                   1299: 
                   1300:   case SSH_ERROR:
                   1301:     ERR2(ssh_get_error(sess));
                   1302:     break;
                   1303:   }
                   1304: 
                   1305:   return ssh_get_fd(sess);
                   1306: 
                   1307:  err:
                   1308:   return -1;
                   1309: }
                   1310: #endif
                   1311: 
                   1312: /**
                   1313:  * sk_open - open a socket
                   1314:  * @s: socket
                   1315:  *
                   1316:  * This function takes a socket resource created by sk_new() and
                   1317:  * initialized by the user and binds a corresponding network connection
                   1318:  * to it.
                   1319:  *
                   1320:  * Result: 0 for success, -1 for an error.
                   1321:  */
                   1322: int
                   1323: sk_open(sock *s)
                   1324: {
                   1325:   int af = AF_UNSPEC;
                   1326:   int fd = -1;
                   1327:   int do_bind = 0;
                   1328:   int bind_port = 0;
                   1329:   ip_addr bind_addr = IPA_NONE;
                   1330:   sockaddr sa;
                   1331: 
                   1332:   if (s->type <= SK_IP)
                   1333:   {
                   1334:     /*
                   1335:      * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
                   1336:      * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
                   1337:      * But the specifications have to be consistent.
                   1338:      */
                   1339: 
                   1340:     switch (s->subtype)
                   1341:     {
                   1342:     case 0:
                   1343:       ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) ||
                   1344:             (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr)));
                   1345:       af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6;
                   1346:       break;
                   1347: 
                   1348:     case SK_IPV4:
                   1349:       ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr));
                   1350:       ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr));
                   1351:       af = AF_INET;
                   1352:       break;
                   1353: 
                   1354:     case SK_IPV6:
                   1355:       ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr));
                   1356:       ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr));
                   1357:       af = AF_INET6;
                   1358:       break;
                   1359: 
                   1360:     default:
                   1361:       bug("Invalid subtype %d", s->subtype);
                   1362:     }
                   1363:   }
                   1364: 
                   1365:   switch (s->type)
                   1366:   {
                   1367:   case SK_TCP_ACTIVE:
                   1368:     s->ttx = "";                       /* Force s->ttx != s->tpos */
                   1369:     /* Fall thru */
                   1370:   case SK_TCP_PASSIVE:
                   1371:     fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
                   1372:     bind_port = s->sport;
                   1373:     bind_addr = s->saddr;
                   1374:     do_bind = bind_port || ipa_nonzero(bind_addr);
                   1375:     break;
                   1376: 
                   1377: #ifdef HAVE_LIBSSH
                   1378:   case SK_SSH_ACTIVE:
                   1379:     s->ttx = "";                       /* Force s->ttx != s->tpos */
                   1380:     fd = sk_open_ssh(s);
                   1381:     break;
                   1382: #endif
                   1383: 
                   1384:   case SK_UDP:
                   1385:     fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
                   1386:     bind_port = s->sport;
                   1387:     bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
                   1388:     do_bind = 1;
                   1389:     break;
                   1390: 
                   1391:   case SK_IP:
                   1392:     fd = socket(af, SOCK_RAW, s->dport);
                   1393:     bind_port = 0;
                   1394:     bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
                   1395:     do_bind = ipa_nonzero(bind_addr);
                   1396:     break;
                   1397: 
                   1398:   case SK_MAGIC:
                   1399:     af = 0;
                   1400:     fd = s->fd;
                   1401:     break;
                   1402: 
                   1403:   default:
                   1404:     bug("sk_open() called for invalid sock type %d", s->type);
                   1405:   }
                   1406: 
                   1407:   if (fd < 0)
                   1408:     ERR("socket");
                   1409: 
                   1410:   s->af = af;
                   1411:   s->fd = fd;
                   1412: 
                   1413:   if (sk_setup(s) < 0)
                   1414:     goto err;
                   1415: 
                   1416:   if (do_bind)
                   1417:   {
                   1418:     if (bind_port)
                   1419:     {
                   1420:       int y = 1;
                   1421: 
                   1422:       if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
                   1423:        ERR2("SO_REUSEADDR");
                   1424: 
                   1425: #ifdef CONFIG_NO_IFACE_BIND
                   1426:       /* Workaround missing ability to bind to an iface */
                   1427:       if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
                   1428:       {
                   1429:        if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
                   1430:          ERR2("SO_REUSEPORT");
                   1431:       }
                   1432: #endif
                   1433:     }
                   1434:     else
                   1435:       if (s->flags & SKF_HIGH_PORT)
                   1436:        if (sk_set_high_port(s) < 0)
                   1437:          log(L_WARN "Socket error: %s%#m", s->err);
                   1438: 
                   1439:     sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
                   1440:     if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
                   1441:       ERR2("bind");
                   1442:   }
                   1443: 
                   1444:   if (s->password)
                   1445:     if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
                   1446:       goto err;
                   1447: 
                   1448:   switch (s->type)
                   1449:   {
                   1450:   case SK_TCP_ACTIVE:
                   1451:     sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
                   1452:     if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
                   1453:       sk_tcp_connected(s);
                   1454:     else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
                   1455:             errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
                   1456:       ERR2("connect");
                   1457:     break;
                   1458: 
                   1459:   case SK_TCP_PASSIVE:
                   1460:     if (listen(fd, 8) < 0)
                   1461:       ERR2("listen");
                   1462:     break;
                   1463: 
                   1464:   case SK_SSH_ACTIVE:
                   1465:   case SK_MAGIC:
                   1466:     break;
                   1467: 
                   1468:   default:
                   1469:     sk_alloc_bufs(s);
                   1470:   }
                   1471: 
                   1472:   if (!(s->flags & SKF_THREAD))
                   1473:     sk_insert(s);
                   1474: 
                   1475:   return 0;
                   1476: 
                   1477: err:
                   1478:   close(fd);
                   1479:   s->fd = -1;
                   1480:   return -1;
                   1481: }
                   1482: 
                   1483: int
                   1484: sk_open_unix(sock *s, char *name)
                   1485: {
                   1486:   struct sockaddr_un sa;
                   1487:   int fd;
                   1488: 
                   1489:   /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
                   1490: 
                   1491:   fd = socket(AF_UNIX, SOCK_STREAM, 0);
                   1492:   if (fd < 0)
                   1493:     return -1;
                   1494: 
                   1495:   if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
                   1496:     return -1;
                   1497: 
                   1498:   /* Path length checked in test_old_bird() */
                   1499:   sa.sun_family = AF_UNIX;
                   1500:   strcpy(sa.sun_path, name);
                   1501: 
                   1502:   if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
                   1503:     return -1;
                   1504: 
                   1505:   if (listen(fd, 8) < 0)
                   1506:     return -1;
                   1507: 
                   1508:   s->fd = fd;
                   1509:   sk_insert(s);
                   1510:   return 0;
                   1511: }
                   1512: 
                   1513: 
                   1514: #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
                   1515:                          CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
                   1516: #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
                   1517: 
                   1518: static void
                   1519: sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
                   1520: {
                   1521:   if (sk_is_ipv4(s))
                   1522:     sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
                   1523:   else
                   1524:     sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
                   1525: }
                   1526: 
                   1527: static void
                   1528: sk_process_cmsgs(sock *s, struct msghdr *msg)
                   1529: {
                   1530:   struct cmsghdr *cm;
                   1531: 
                   1532:   s->laddr = IPA_NONE;
                   1533:   s->lifindex = 0;
                   1534:   s->rcv_ttl = -1;
                   1535: 
                   1536:   for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
                   1537:   {
                   1538:     if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
                   1539:     {
                   1540:       sk_process_cmsg4_pktinfo(s, cm);
                   1541:       sk_process_cmsg4_ttl(s, cm);
                   1542:     }
                   1543: 
                   1544:     if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
                   1545:     {
                   1546:       sk_process_cmsg6_pktinfo(s, cm);
                   1547:       sk_process_cmsg6_ttl(s, cm);
                   1548:     }
                   1549:   }
                   1550: }
                   1551: 
                   1552: 
                   1553: static inline int
                   1554: sk_sendmsg(sock *s)
                   1555: {
                   1556:   struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
                   1557:   byte cmsg_buf[CMSG_TX_SPACE];
                   1558:   sockaddr dst;
                   1559:   int flags = 0;
                   1560: 
                   1561:   sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
                   1562: 
                   1563:   struct msghdr msg = {
                   1564:     .msg_name = &dst.sa,
                   1565:     .msg_namelen = SA_LEN(dst),
                   1566:     .msg_iov = &iov,
                   1567:     .msg_iovlen = 1
                   1568:   };
                   1569: 
                   1570: #ifdef CONFIG_DONTROUTE_UNICAST
                   1571:   /* FreeBSD silently changes TTL to 1 when MSG_DONTROUTE is used, therefore we
                   1572:      cannot use it for other cases (e.g. when TTL security is used). */
                   1573:   if (ipa_is_ip4(s->daddr) && ip4_is_unicast(ipa_to_ip4(s->daddr)) && (s->ttl == 1))
                   1574:     flags = MSG_DONTROUTE;
                   1575: #endif
                   1576: 
                   1577: #ifdef CONFIG_USE_HDRINCL
                   1578:   byte hdr[20];
                   1579:   struct iovec iov2[2] = { {hdr, 20}, iov };
                   1580: 
                   1581:   if (s->flags & SKF_HDRINCL)
                   1582:   {
                   1583:     sk_prepare_ip_header(s, hdr, iov.iov_len);
                   1584:     msg.msg_iov = iov2;
                   1585:     msg.msg_iovlen = 2;
                   1586:   }
                   1587: #endif
                   1588: 
                   1589:   if (s->flags & SKF_PKTINFO)
                   1590:     sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
                   1591: 
                   1592:   return sendmsg(s->fd, &msg, flags);
                   1593: }
                   1594: 
                   1595: static inline int
                   1596: sk_recvmsg(sock *s)
                   1597: {
                   1598:   struct iovec iov = {s->rbuf, s->rbsize};
                   1599:   byte cmsg_buf[CMSG_RX_SPACE];
                   1600:   sockaddr src;
                   1601: 
                   1602:   struct msghdr msg = {
                   1603:     .msg_name = &src.sa,
                   1604:     .msg_namelen = sizeof(src), // XXXX ??
                   1605:     .msg_iov = &iov,
                   1606:     .msg_iovlen = 1,
                   1607:     .msg_control = cmsg_buf,
                   1608:     .msg_controllen = sizeof(cmsg_buf),
                   1609:     .msg_flags = 0
                   1610:   };
                   1611: 
                   1612:   int rv = recvmsg(s->fd, &msg, 0);
                   1613:   if (rv < 0)
                   1614:     return rv;
                   1615: 
                   1616:   //ifdef IPV4
                   1617:   //  if (cf_type == SK_IP)
                   1618:   //    rv = ipv4_skip_header(pbuf, rv);
                   1619:   //endif
                   1620: 
                   1621:   sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
                   1622:   sk_process_cmsgs(s, &msg);
                   1623: 
                   1624:   if (msg.msg_flags & MSG_TRUNC)
                   1625:     s->flags |= SKF_TRUNCATED;
                   1626:   else
                   1627:     s->flags &= ~SKF_TRUNCATED;
                   1628: 
                   1629:   return rv;
                   1630: }
                   1631: 
                   1632: 
                   1633: static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
                   1634: 
                   1635: static int
                   1636: sk_maybe_write(sock *s)
                   1637: {
                   1638:   int e;
                   1639: 
                   1640:   switch (s->type)
                   1641:   {
                   1642:   case SK_TCP:
                   1643:   case SK_MAGIC:
                   1644:   case SK_UNIX:
                   1645:     while (s->ttx != s->tpos)
                   1646:     {
                   1647:       e = write(s->fd, s->ttx, s->tpos - s->ttx);
                   1648: 
                   1649:       if (e < 0)
                   1650:       {
                   1651:        if (errno != EINTR && errno != EAGAIN)
                   1652:        {
                   1653:          reset_tx_buffer(s);
                   1654:          /* EPIPE is just a connection close notification during TX */
                   1655:          s->err_hook(s, (errno != EPIPE) ? errno : 0);
                   1656:          return -1;
                   1657:        }
                   1658:        return 0;
                   1659:       }
                   1660:       s->ttx += e;
                   1661:     }
                   1662:     reset_tx_buffer(s);
                   1663:     return 1;
                   1664: 
                   1665: #ifdef HAVE_LIBSSH
                   1666:   case SK_SSH:
                   1667:     while (s->ttx != s->tpos)
                   1668:     {
                   1669:       e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx);
                   1670: 
                   1671:       if (e < 0)
                   1672:       {
                   1673:        s->err = ssh_get_error(s->ssh->session);
                   1674:        s->err_hook(s, ssh_get_error_code(s->ssh->session));
                   1675: 
                   1676:        reset_tx_buffer(s);
                   1677:        /* EPIPE is just a connection close notification during TX */
                   1678:        s->err_hook(s, (errno != EPIPE) ? errno : 0);
                   1679:        return -1;
                   1680:       }
                   1681:       s->ttx += e;
                   1682:     }
                   1683:     reset_tx_buffer(s);
                   1684:     return 1;
                   1685: #endif
                   1686: 
                   1687:   case SK_UDP:
                   1688:   case SK_IP:
                   1689:     {
                   1690:       if (s->tbuf == s->tpos)
                   1691:        return 1;
                   1692: 
                   1693:       e = sk_sendmsg(s);
                   1694: 
                   1695:       if (e < 0)
                   1696:       {
                   1697:        if (errno != EINTR && errno != EAGAIN)
                   1698:        {
                   1699:          reset_tx_buffer(s);
                   1700:          s->err_hook(s, errno);
                   1701:          return -1;
                   1702:        }
                   1703: 
                   1704:        if (!s->tx_hook)
                   1705:          reset_tx_buffer(s);
                   1706:        return 0;
                   1707:       }
                   1708:       reset_tx_buffer(s);
                   1709:       return 1;
                   1710:     }
                   1711: 
                   1712:   default:
                   1713:     bug("sk_maybe_write: unknown socket type %d", s->type);
                   1714:   }
                   1715: }
                   1716: 
                   1717: int
                   1718: sk_rx_ready(sock *s)
                   1719: {
                   1720:   int rv;
                   1721:   struct pollfd pfd = { .fd = s->fd };
                   1722:   pfd.events |= POLLIN;
                   1723: 
                   1724:  redo:
                   1725:   rv = poll(&pfd, 1, 0);
                   1726: 
                   1727:   if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
                   1728:     goto redo;
                   1729: 
                   1730:   return rv;
                   1731: }
                   1732: 
                   1733: /**
                   1734:  * sk_send - send data to a socket
                   1735:  * @s: socket
                   1736:  * @len: number of bytes to send
                   1737:  *
                   1738:  * This function sends @len bytes of data prepared in the
                   1739:  * transmit buffer of the socket @s to the network connection.
                   1740:  * If the packet can be sent immediately, it does so and returns
                   1741:  * 1, else it queues the packet for later processing, returns 0
                   1742:  * and calls the @tx_hook of the socket when the tranmission
                   1743:  * takes place.
                   1744:  */
                   1745: int
                   1746: sk_send(sock *s, unsigned len)
                   1747: {
                   1748:   s->ttx = s->tbuf;
                   1749:   s->tpos = s->tbuf + len;
                   1750:   return sk_maybe_write(s);
                   1751: }
                   1752: 
                   1753: /**
                   1754:  * sk_send_to - send data to a specific destination
                   1755:  * @s: socket
                   1756:  * @len: number of bytes to send
                   1757:  * @addr: IP address to send the packet to
                   1758:  * @port: port to send the packet to
                   1759:  *
                   1760:  * This is a sk_send() replacement for connection-less packet sockets
                   1761:  * which allows destination of the packet to be chosen dynamically.
                   1762:  * Raw IP sockets should use 0 for @port.
                   1763:  */
                   1764: int
                   1765: sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
                   1766: {
                   1767:   s->daddr = addr;
                   1768:   if (port)
                   1769:     s->dport = port;
                   1770: 
                   1771:   s->ttx = s->tbuf;
                   1772:   s->tpos = s->tbuf + len;
                   1773:   return sk_maybe_write(s);
                   1774: }
                   1775: 
                   1776: /*
                   1777: int
                   1778: sk_send_full(sock *s, unsigned len, struct iface *ifa,
                   1779:             ip_addr saddr, ip_addr daddr, unsigned dport)
                   1780: {
                   1781:   s->iface = ifa;
                   1782:   s->saddr = saddr;
                   1783:   s->daddr = daddr;
                   1784:   s->dport = dport;
                   1785:   s->ttx = s->tbuf;
                   1786:   s->tpos = s->tbuf + len;
                   1787:   return sk_maybe_write(s);
                   1788: }
                   1789: */
                   1790: 
                   1791: static void
                   1792: call_rx_hook(sock *s, int size)
                   1793: {
                   1794:   if (s->rx_hook(s, size))
                   1795:   {
                   1796:     /* We need to be careful since the socket could have been deleted by the hook */
                   1797:     if (current_sock == s)
                   1798:       s->rpos = s->rbuf;
                   1799:   }
                   1800: }
                   1801: 
                   1802: #ifdef HAVE_LIBSSH
                   1803: static int
                   1804: sk_read_ssh(sock *s)
                   1805: {
                   1806:   ssh_channel rchans[2] = { s->ssh->channel, NULL };
                   1807:   struct timeval timev = { 1, 0 };
                   1808: 
                   1809:   if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR)
                   1810:     return 1; /* Try again */
                   1811: 
                   1812:   if (ssh_channel_is_eof(s->ssh->channel) != 0)
                   1813:   {
                   1814:     /* The remote side is closing the connection */
                   1815:     s->err_hook(s, 0);
                   1816:     return 0;
                   1817:   }
                   1818: 
                   1819:   if (rchans[0] == NULL)
                   1820:     return 0; /* No data is available on the socket */
                   1821: 
                   1822:   const uint used_bytes = s->rpos - s->rbuf;
                   1823:   const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0);
                   1824:   if (read_bytes > 0)
                   1825:   {
                   1826:     /* Received data */
                   1827:     s->rpos += read_bytes;
                   1828:     call_rx_hook(s, used_bytes + read_bytes);
                   1829:     return 1;
                   1830:   }
                   1831:   else if (read_bytes == 0)
                   1832:   {
                   1833:     if (ssh_channel_is_eof(s->ssh->channel) != 0)
                   1834:     {
                   1835:        /* The remote side is closing the connection */
                   1836:        s->err_hook(s, 0);
                   1837:     }
                   1838:   }
                   1839:   else
                   1840:   {
                   1841:     s->err = ssh_get_error(s->ssh->session);
                   1842:     s->err_hook(s, ssh_get_error_code(s->ssh->session));
                   1843:   }
                   1844: 
                   1845:   return 0; /* No data is available on the socket */
                   1846: }
                   1847: #endif
                   1848: 
                   1849:  /* sk_read() and sk_write() are called from BFD's event loop */
                   1850: 
                   1851: int
                   1852: sk_read(sock *s, int revents)
                   1853: {
                   1854:   switch (s->type)
                   1855:   {
                   1856:   case SK_TCP_PASSIVE:
                   1857:     return sk_passive_connected(s, SK_TCP);
                   1858: 
                   1859:   case SK_UNIX_PASSIVE:
                   1860:     return sk_passive_connected(s, SK_UNIX);
                   1861: 
                   1862:   case SK_TCP:
                   1863:   case SK_UNIX:
                   1864:     {
                   1865:       int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
                   1866: 
                   1867:       if (c < 0)
                   1868:       {
                   1869:        if (errno != EINTR && errno != EAGAIN)
                   1870:          s->err_hook(s, errno);
                   1871:        else if (errno == EAGAIN && !(revents & POLLIN))
                   1872:        {
                   1873:          log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
                   1874:          s->err_hook(s, 0);
                   1875:        }
                   1876:       }
                   1877:       else if (!c)
                   1878:        s->err_hook(s, 0);
                   1879:       else
                   1880:       {
                   1881:        s->rpos += c;
                   1882:        call_rx_hook(s, s->rpos - s->rbuf);
                   1883:        return 1;
                   1884:       }
                   1885:       return 0;
                   1886:     }
                   1887: 
                   1888: #ifdef HAVE_LIBSSH
                   1889:   case SK_SSH:
                   1890:     return sk_read_ssh(s);
                   1891: #endif
                   1892: 
                   1893:   case SK_MAGIC:
                   1894:     return s->rx_hook(s, 0);
                   1895: 
                   1896:   default:
                   1897:     {
                   1898:       int e = sk_recvmsg(s);
                   1899: 
                   1900:       if (e < 0)
                   1901:       {
                   1902:        if (errno != EINTR && errno != EAGAIN)
                   1903:          s->err_hook(s, errno);
                   1904:        return 0;
                   1905:       }
                   1906: 
                   1907:       s->rpos = s->rbuf + e;
                   1908:       s->rx_hook(s, e);
                   1909:       return 1;
                   1910:     }
                   1911:   }
                   1912: }
                   1913: 
                   1914: int
                   1915: sk_write(sock *s)
                   1916: {
                   1917:   switch (s->type)
                   1918:   {
                   1919:   case SK_TCP_ACTIVE:
                   1920:     {
                   1921:       sockaddr sa;
                   1922:       sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
                   1923: 
                   1924:       if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
                   1925:        sk_tcp_connected(s);
                   1926:       else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
                   1927:        s->err_hook(s, errno);
                   1928:       return 0;
                   1929:     }
                   1930: 
                   1931: #ifdef HAVE_LIBSSH
                   1932:   case SK_SSH_ACTIVE:
                   1933:     {
                   1934:       switch (sk_ssh_connect(s))
                   1935:       {
                   1936:        case SSH_OK:
                   1937:          sk_ssh_connected(s);
                   1938:          break;
                   1939: 
                   1940:        case SSH_AGAIN:
                   1941:          return 1;
                   1942: 
                   1943:        case SSH_ERROR:
                   1944:          s->err = ssh_get_error(s->ssh->session);
                   1945:          s->err_hook(s, ssh_get_error_code(s->ssh->session));
                   1946:          break;
                   1947:       }
                   1948:       return 0;
                   1949:     }
                   1950: #endif
                   1951: 
                   1952:   default:
                   1953:     if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
                   1954:     {
                   1955:       if (s->tx_hook)
                   1956:        s->tx_hook(s);
                   1957:       return 1;
                   1958:     }
                   1959:     return 0;
                   1960:   }
                   1961: }
                   1962: 
                   1963: int sk_is_ipv4(sock *s)
                   1964: { return s->af == AF_INET; }
                   1965: 
                   1966: int sk_is_ipv6(sock *s)
                   1967: { return s->af == AF_INET6; }
                   1968: 
                   1969: void
                   1970: sk_err(sock *s, int revents)
                   1971: {
                   1972:   int se = 0, sse = sizeof(se);
                   1973:   if ((s->type != SK_MAGIC) && (revents & POLLERR))
                   1974:     if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
                   1975:     {
                   1976:       log(L_ERR "IO: Socket error: SO_ERROR: %m");
                   1977:       se = 0;
                   1978:     }
                   1979: 
                   1980:   s->err_hook(s, se);
                   1981: }
                   1982: 
                   1983: void
                   1984: sk_dump_all(void)
                   1985: {
                   1986:   node *n;
                   1987:   sock *s;
                   1988: 
                   1989:   debug("Open sockets:\n");
                   1990:   WALK_LIST(n, sock_list)
                   1991:   {
                   1992:     s = SKIP_BACK(sock, n, n);
                   1993:     debug("%p ", s);
                   1994:     sk_dump(&s->r);
                   1995:   }
                   1996:   debug("\n");
                   1997: }
                   1998: 
                   1999: 
                   2000: /*
                   2001:  *     Internal event log and watchdog
                   2002:  */
                   2003: 
                   2004: #define EVENT_LOG_LENGTH 32
                   2005: 
                   2006: struct event_log_entry
                   2007: {
                   2008:   void *hook;
                   2009:   void *data;
                   2010:   btime timestamp;
                   2011:   btime duration;
                   2012: };
                   2013: 
                   2014: static struct event_log_entry event_log[EVENT_LOG_LENGTH];
                   2015: static struct event_log_entry *event_open;
                   2016: static int event_log_pos, event_log_num, watchdog_active;
                   2017: static btime last_time;
                   2018: static btime loop_time;
                   2019: 
                   2020: static void
                   2021: io_update_time(void)
                   2022: {
                   2023:   struct timespec ts;
                   2024:   int rv;
                   2025: 
                   2026:   /*
                   2027:    * This is third time-tracking procedure (after update_times() above and
                   2028:    * times_update() in BFD), dedicated to internal event log and latency
                   2029:    * tracking. Hopefully, we consolidate these sometimes.
                   2030:    */
                   2031: 
                   2032:   rv = clock_gettime(CLOCK_MONOTONIC, &ts);
                   2033:   if (rv < 0)
                   2034:     die("clock_gettime: %m");
                   2035: 
                   2036:   last_time = ts.tv_sec S + ts.tv_nsec NS;
                   2037: 
                   2038:   if (event_open)
                   2039:   {
                   2040:     event_open->duration = last_time - event_open->timestamp;
                   2041: 
                   2042:     if (event_open->duration > config->latency_limit)
                   2043:       log(L_WARN "Event 0x%p 0x%p took %d ms",
                   2044:          event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
                   2045: 
                   2046:     event_open = NULL;
                   2047:   }
                   2048: }
                   2049: 
                   2050: /**
                   2051:  * io_log_event - mark approaching event into event log
                   2052:  * @hook: event hook address
                   2053:  * @data: event data address
                   2054:  *
                   2055:  * Store info (hook, data, timestamp) about the following internal event into
                   2056:  * a circular event log (@event_log). When latency tracking is enabled, the log
                   2057:  * entry is kept open (in @event_open) so the duration can be filled later.
                   2058:  */
                   2059: void
                   2060: io_log_event(void *hook, void *data)
                   2061: {
                   2062:   if (config->latency_debug)
                   2063:     io_update_time();
                   2064: 
                   2065:   struct event_log_entry *en = event_log + event_log_pos;
                   2066: 
                   2067:   en->hook = hook;
                   2068:   en->data = data;
                   2069:   en->timestamp = last_time;
                   2070:   en->duration = 0;
                   2071: 
                   2072:   event_log_num++;
                   2073:   event_log_pos++;
                   2074:   event_log_pos %= EVENT_LOG_LENGTH;
                   2075: 
                   2076:   event_open = config->latency_debug ? en : NULL;
                   2077: }
                   2078: 
                   2079: static inline void
                   2080: io_close_event(void)
                   2081: {
                   2082:   if (event_open)
                   2083:     io_update_time();
                   2084: }
                   2085: 
                   2086: void
                   2087: io_log_dump(void)
                   2088: {
                   2089:   int i;
                   2090: 
                   2091:   log(L_DEBUG "Event log:");
                   2092:   for (i = 0; i < EVENT_LOG_LENGTH; i++)
                   2093:   {
                   2094:     struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
                   2095:     if (en->hook)
                   2096:       log(L_DEBUG "  Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
                   2097:          (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
                   2098:   }
                   2099: }
                   2100: 
                   2101: void
                   2102: watchdog_sigalrm(int sig UNUSED)
                   2103: {
                   2104:   /* Update last_time and duration, but skip latency check */
                   2105:   config->latency_limit = 0xffffffff;
                   2106:   io_update_time();
                   2107: 
                   2108:   /* We want core dump */
                   2109:   abort();
                   2110: }
                   2111: 
                   2112: static inline void
                   2113: watchdog_start1(void)
                   2114: {
                   2115:   io_update_time();
                   2116: 
                   2117:   loop_time = last_time;
                   2118: }
                   2119: 
                   2120: static inline void
                   2121: watchdog_start(void)
                   2122: {
                   2123:   io_update_time();
                   2124: 
                   2125:   loop_time = last_time;
                   2126:   event_log_num = 0;
                   2127: 
                   2128:   if (config->watchdog_timeout)
                   2129:   {
                   2130:     alarm(config->watchdog_timeout);
                   2131:     watchdog_active = 1;
                   2132:   }
                   2133: }
                   2134: 
                   2135: static inline void
                   2136: watchdog_stop(void)
                   2137: {
                   2138:   io_update_time();
                   2139: 
                   2140:   if (watchdog_active)
                   2141:   {
                   2142:     alarm(0);
                   2143:     watchdog_active = 0;
                   2144:   }
                   2145: 
                   2146:   btime duration = last_time - loop_time;
                   2147:   if (duration > config->watchdog_warning)
                   2148:     log(L_WARN "I/O loop cycle took %d ms for %d events",
                   2149:        (int) (duration TO_MS), event_log_num);
                   2150: }
                   2151: 
                   2152: 
                   2153: /*
                   2154:  *     Main I/O Loop
                   2155:  */
                   2156: 
                   2157: void
                   2158: io_init(void)
                   2159: {
                   2160:   init_list(&sock_list);
                   2161:   init_list(&global_event_list);
                   2162:   krt_io_init();
                   2163:   // XXX init_times();
                   2164:   // XXX update_times();
                   2165:   boot_time = current_time();
                   2166: 
                   2167:   u64 now = (u64) current_real_time();
                   2168:   srandom((uint) (now ^ (now >> 32)));
                   2169: }
                   2170: 
                   2171: static int short_loops = 0;
                   2172: #define SHORT_LOOP_MAX 10
                   2173: 
                   2174: void
                   2175: io_loop(void)
                   2176: {
                   2177:   int poll_tout, timeout;
                   2178:   int nfds, events, pout;
                   2179:   timer *t;
                   2180:   sock *s;
                   2181:   node *n;
                   2182:   int fdmax = 256;
                   2183:   struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
                   2184: 
                   2185:   watchdog_start1();
                   2186:   for(;;)
                   2187:     {
                   2188:       times_update(&main_timeloop);
                   2189:       events = ev_run_list(&global_event_list);
                   2190:       timers_fire(&main_timeloop);
                   2191:       io_close_event();
                   2192: 
                   2193:       // FIXME
                   2194:       poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
                   2195:       if (t = timers_first(&main_timeloop))
                   2196:       {
                   2197:        times_update(&main_timeloop);
                   2198:        timeout = (tm_remains(t) TO_MS) + 1;
                   2199:        poll_tout = MIN(poll_tout, timeout);
                   2200:       }
                   2201: 
                   2202:       nfds = 0;
                   2203:       WALK_LIST(n, sock_list)
                   2204:        {
                   2205:          pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
                   2206:          s = SKIP_BACK(sock, n, n);
                   2207:          if (s->rx_hook)
                   2208:            {
                   2209:              pfd[nfds].fd = s->fd;
                   2210:              pfd[nfds].events |= POLLIN;
                   2211:            }
                   2212:          if (s->tx_hook && s->ttx != s->tpos)
                   2213:            {
                   2214:              pfd[nfds].fd = s->fd;
                   2215:              pfd[nfds].events |= POLLOUT;
                   2216:            }
                   2217:          if (pfd[nfds].fd != -1)
                   2218:            {
                   2219:              s->index = nfds;
                   2220:              nfds++;
                   2221:            }
                   2222:          else
                   2223:            s->index = -1;
                   2224: 
                   2225:          if (nfds >= fdmax)
                   2226:            {
                   2227:              fdmax *= 2;
                   2228:              pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
                   2229:            }
                   2230:        }
                   2231: 
                   2232:       /*
                   2233:        * Yes, this is racy. But even if the signal comes before this test
                   2234:        * and entering poll(), it gets caught on the next timer tick.
                   2235:        */
                   2236: 
                   2237:       if (async_config_flag)
                   2238:        {
                   2239:          io_log_event(async_config, NULL);
                   2240:          async_config();
                   2241:          async_config_flag = 0;
                   2242:          continue;
                   2243:        }
                   2244:       if (async_dump_flag)
                   2245:        {
                   2246:          io_log_event(async_dump, NULL);
                   2247:          async_dump();
                   2248:          async_dump_flag = 0;
                   2249:          continue;
                   2250:        }
                   2251:       if (async_shutdown_flag)
                   2252:        {
                   2253:          io_log_event(async_shutdown, NULL);
                   2254:          async_shutdown();
                   2255:          async_shutdown_flag = 0;
                   2256:          continue;
                   2257:        }
                   2258: 
                   2259:       /* And finally enter poll() to find active sockets */
                   2260:       watchdog_stop();
                   2261:       pout = poll(pfd, nfds, poll_tout);
                   2262:       watchdog_start();
                   2263: 
                   2264:       if (pout < 0)
                   2265:        {
                   2266:          if (errno == EINTR || errno == EAGAIN)
                   2267:            continue;
                   2268:          die("poll: %m");
                   2269:        }
                   2270:       if (pout)
                   2271:        {
                   2272:          times_update(&main_timeloop);
                   2273: 
                   2274:          /* guaranteed to be non-empty */
                   2275:          current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
                   2276: 
                   2277:          while (current_sock)
                   2278:            {
                   2279:              sock *s = current_sock;
                   2280:              if (s->index == -1)
                   2281:                {
                   2282:                  current_sock = sk_next(s);
                   2283:                  goto next;
                   2284:                }
                   2285: 
                   2286:              int e;
                   2287:              int steps;
                   2288: 
                   2289:              steps = MAX_STEPS;
                   2290:              if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
                   2291:                do
                   2292:                  {
                   2293:                    steps--;
                   2294:                    io_log_event(s->rx_hook, s->data);
                   2295:                    e = sk_read(s, pfd[s->index].revents);
                   2296:                    if (s != current_sock)
                   2297:                      goto next;
                   2298:                  }
                   2299:                while (e && s->rx_hook && steps);
                   2300: 
                   2301:              steps = MAX_STEPS;
                   2302:              if (pfd[s->index].revents & POLLOUT)
                   2303:                do
                   2304:                  {
                   2305:                    steps--;
                   2306:                    io_log_event(s->tx_hook, s->data);
                   2307:                    e = sk_write(s);
                   2308:                    if (s != current_sock)
                   2309:                      goto next;
                   2310:                  }
                   2311:                while (e && steps);
                   2312: 
                   2313:              current_sock = sk_next(s);
                   2314:            next: ;
                   2315:            }
                   2316: 
                   2317:          short_loops++;
                   2318:          if (events && (short_loops < SHORT_LOOP_MAX))
                   2319:            continue;
                   2320:          short_loops = 0;
                   2321: 
                   2322:          int count = 0;
                   2323:          current_sock = stored_sock;
                   2324:          if (current_sock == NULL)
                   2325:            current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
                   2326: 
                   2327:          while (current_sock && count < MAX_RX_STEPS)
                   2328:            {
                   2329:              sock *s = current_sock;
                   2330:              if (s->index == -1)
                   2331:                {
                   2332:                  current_sock = sk_next(s);
                   2333:                  goto next2;
                   2334:                }
                   2335: 
                   2336:              if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
                   2337:                {
                   2338:                  count++;
                   2339:                  io_log_event(s->rx_hook, s->data);
                   2340:                  sk_read(s, pfd[s->index].revents);
                   2341:                  if (s != current_sock)
                   2342:                    goto next2;
                   2343:                }
                   2344: 
                   2345:              if (pfd[s->index].revents & (POLLHUP | POLLERR))
                   2346:                {
                   2347:                  sk_err(s, pfd[s->index].revents);
                   2348:                  if (s != current_sock)
                   2349:                    goto next2;
                   2350:                }
                   2351: 
                   2352:              current_sock = sk_next(s);
                   2353:            next2: ;
                   2354:            }
                   2355: 
                   2356: 
                   2357:          stored_sock = current_sock;
                   2358:        }
                   2359:     }
                   2360: }
                   2361: 
                   2362: void
                   2363: test_old_bird(char *path)
                   2364: {
                   2365:   int fd;
                   2366:   struct sockaddr_un sa;
                   2367: 
                   2368:   fd = socket(AF_UNIX, SOCK_STREAM, 0);
                   2369:   if (fd < 0)
                   2370:     die("Cannot create socket: %m");
                   2371:   if (strlen(path) >= sizeof(sa.sun_path))
                   2372:     die("Socket path too long");
                   2373:   bzero(&sa, sizeof(sa));
                   2374:   sa.sun_family = AF_UNIX;
                   2375:   strcpy(sa.sun_path, path);
                   2376:   if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
                   2377:     die("I found another BIRD running.");
                   2378:   close(fd);
                   2379: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>