File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / bird2 / sysdep / unix / io.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Oct 21 16:03:56 2019 UTC (5 years, 5 months ago) by misho
Branches: bird2, MAIN
CVS tags: v2_0_7p0, HEAD
bird2 ver 2.0.7

    1: /*
    2:  *	BIRD Internet Routing Daemon -- Unix I/O
    3:  *
    4:  *	(c) 1998--2004 Martin Mares <mj@ucw.cz>
    5:  *      (c) 2004       Ondrej Filip <feela@network.cz>
    6:  *
    7:  *	Can be freely distributed and used under the terms of the GNU GPL.
    8:  */
    9: 
   10: /* Unfortunately, some glibc versions hide parts of RFC 3542 API
   11:    if _GNU_SOURCE is not defined. */
   12: #ifndef _GNU_SOURCE
   13: #define _GNU_SOURCE
   14: #endif
   15: 
   16: #include <stdio.h>
   17: #include <stdlib.h>
   18: #include <time.h>
   19: #include <sys/time.h>
   20: #include <sys/types.h>
   21: #include <sys/socket.h>
   22: #include <sys/uio.h>
   23: #include <sys/un.h>
   24: #include <poll.h>
   25: #include <unistd.h>
   26: #include <fcntl.h>
   27: #include <errno.h>
   28: #include <net/if.h>
   29: #include <netinet/in.h>
   30: #include <netinet/tcp.h>
   31: #include <netinet/udp.h>
   32: #include <netinet/icmp6.h>
   33: 
   34: #include "nest/bird.h"
   35: #include "lib/lists.h"
   36: #include "lib/resource.h"
   37: #include "lib/socket.h"
   38: #include "lib/event.h"
   39: #include "lib/timer.h"
   40: #include "lib/string.h"
   41: #include "nest/iface.h"
   42: #include "conf/conf.h"
   43: 
   44: #include "sysdep/unix/unix.h"
   45: #include CONFIG_INCLUDE_SYSIO_H
   46: 
   47: /* Maximum number of calls of tx handler for one socket in one
   48:  * poll iteration. Should be small enough to not monopolize CPU by
   49:  * one protocol instance.
   50:  */
   51: #define MAX_STEPS 4
   52: 
   53: /* Maximum number of calls of rx handler for all sockets in one poll
   54:    iteration. RX callbacks are often much more costly so we limit
   55:    this to gen small latencies */
   56: #define MAX_RX_STEPS 4
   57: 
   58: 
   59: /*
   60:  *	Tracked Files
   61:  */
   62: 
   63: struct rfile {
   64:   resource r;
   65:   FILE *f;
   66: };
   67: 
   68: static void
   69: rf_free(resource *r)
   70: {
   71:   struct rfile *a = (struct rfile *) r;
   72: 
   73:   fclose(a->f);
   74: }
   75: 
   76: static void
   77: rf_dump(resource *r)
   78: {
   79:   struct rfile *a = (struct rfile *) r;
   80: 
   81:   debug("(FILE *%p)\n", a->f);
   82: }
   83: 
   84: static struct resclass rf_class = {
   85:   "FILE",
   86:   sizeof(struct rfile),
   87:   rf_free,
   88:   rf_dump,
   89:   NULL,
   90:   NULL
   91: };
   92: 
   93: struct rfile *
   94: rf_open(pool *p, char *name, char *mode)
   95: {
   96:   FILE *f = fopen(name, mode);
   97: 
   98:   if (!f)
   99:     return NULL;
  100: 
  101:   struct rfile *r = ralloc(p, &rf_class);
  102:   r->f = f;
  103:   return r;
  104: }
  105: 
  106: void *
  107: rf_file(struct rfile *f)
  108: {
  109:   return f->f;
  110: }
  111: 
  112: int
  113: rf_fileno(struct rfile *f)
  114: {
  115:   return fileno(f->f);
  116: }
  117: 
  118: 
  119: /*
  120:  *	Time clock
  121:  */
  122: 
  123: btime boot_time;
  124: 
  125: void
  126: times_init(struct timeloop *loop)
  127: {
  128:   struct timespec ts;
  129:   int rv;
  130: 
  131:   rv = clock_gettime(CLOCK_MONOTONIC, &ts);
  132:   if (rv < 0)
  133:     die("Monotonic clock is missing");
  134: 
  135:   if ((ts.tv_sec < 0) || (((u64) ts.tv_sec) > ((u64) 1 << 40)))
  136:     log(L_WARN "Monotonic clock is crazy");
  137: 
  138:   loop->last_time = ts.tv_sec S + ts.tv_nsec NS;
  139:   loop->real_time = 0;
  140: }
  141: 
  142: void
  143: times_update(struct timeloop *loop)
  144: {
  145:   struct timespec ts;
  146:   int rv;
  147: 
  148:   rv = clock_gettime(CLOCK_MONOTONIC, &ts);
  149:   if (rv < 0)
  150:     die("clock_gettime: %m");
  151: 
  152:   btime new_time = ts.tv_sec S + ts.tv_nsec NS;
  153: 
  154:   if (new_time < loop->last_time)
  155:     log(L_ERR "Monotonic clock is broken");
  156: 
  157:   loop->last_time = new_time;
  158:   loop->real_time = 0;
  159: }
  160: 
  161: void
  162: times_update_real_time(struct timeloop *loop)
  163: {
  164:   struct timespec ts;
  165:   int rv;
  166: 
  167:   rv = clock_gettime(CLOCK_REALTIME, &ts);
  168:   if (rv < 0)
  169:     die("clock_gettime: %m");
  170: 
  171:   loop->real_time = ts.tv_sec S + ts.tv_nsec NS;
  172: }
  173: 
  174: 
  175: /**
  176:  * DOC: Sockets
  177:  *
  178:  * Socket resources represent network connections. Their data structure (&socket)
  179:  * contains a lot of fields defining the exact type of the socket, the local and
  180:  * remote addresses and ports, pointers to socket buffers and finally pointers to
  181:  * hook functions to be called when new data have arrived to the receive buffer
  182:  * (@rx_hook), when the contents of the transmit buffer have been transmitted
  183:  * (@tx_hook) and when an error or connection close occurs (@err_hook).
  184:  *
  185:  * Freeing of sockets from inside socket hooks is perfectly safe.
  186:  */
  187: 
  188: #ifndef SOL_IP
  189: #define SOL_IP IPPROTO_IP
  190: #endif
  191: 
  192: #ifndef SOL_IPV6
  193: #define SOL_IPV6 IPPROTO_IPV6
  194: #endif
  195: 
  196: #ifndef SOL_ICMPV6
  197: #define SOL_ICMPV6 IPPROTO_ICMPV6
  198: #endif
  199: 
  200: 
  201: /*
  202:  *	Sockaddr helper functions
  203:  */
  204: 
  205: static inline int UNUSED sockaddr_length(int af)
  206: { return (af == AF_INET) ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); }
  207: 
  208: static inline void
  209: sockaddr_fill4(struct sockaddr_in *sa, ip_addr a, uint port)
  210: {
  211:   memset(sa, 0, sizeof(struct sockaddr_in));
  212: #ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
  213:   sa->sin_len = sizeof(struct sockaddr_in);
  214: #endif
  215:   sa->sin_family = AF_INET;
  216:   sa->sin_port = htons(port);
  217:   sa->sin_addr = ipa_to_in4(a);
  218: }
  219: 
  220: static inline void
  221: sockaddr_fill6(struct sockaddr_in6 *sa, ip_addr a, struct iface *ifa, uint port)
  222: {
  223:   memset(sa, 0, sizeof(struct sockaddr_in6));
  224: #ifdef SIN6_LEN
  225:   sa->sin6_len = sizeof(struct sockaddr_in6);
  226: #endif
  227:   sa->sin6_family = AF_INET6;
  228:   sa->sin6_port = htons(port);
  229:   sa->sin6_flowinfo = 0;
  230:   sa->sin6_addr = ipa_to_in6(a);
  231: 
  232:   if (ifa && ipa_is_link_local(a))
  233:     sa->sin6_scope_id = ifa->index;
  234: }
  235: 
  236: void
  237: sockaddr_fill(sockaddr *sa, int af, ip_addr a, struct iface *ifa, uint port)
  238: {
  239:   if (af == AF_INET)
  240:     sockaddr_fill4((struct sockaddr_in *) sa, a, port);
  241:   else if (af == AF_INET6)
  242:     sockaddr_fill6((struct sockaddr_in6 *) sa, a, ifa, port);
  243:   else
  244:     bug("Unknown AF");
  245: }
  246: 
  247: static inline void
  248: sockaddr_read4(struct sockaddr_in *sa, ip_addr *a, uint *port)
  249: {
  250:   *port = ntohs(sa->sin_port);
  251:   *a = ipa_from_in4(sa->sin_addr);
  252: }
  253: 
  254: static inline void
  255: sockaddr_read6(struct sockaddr_in6 *sa, ip_addr *a, struct iface **ifa, uint *port)
  256: {
  257:   *port = ntohs(sa->sin6_port);
  258:   *a = ipa_from_in6(sa->sin6_addr);
  259: 
  260:   if (ifa && ipa_is_link_local(*a))
  261:     *ifa = if_find_by_index(sa->sin6_scope_id);
  262: }
  263: 
  264: int
  265: sockaddr_read(sockaddr *sa, int af, ip_addr *a, struct iface **ifa, uint *port)
  266: {
  267:   if (sa->sa.sa_family != af)
  268:     goto fail;
  269: 
  270:   if (af == AF_INET)
  271:     sockaddr_read4((struct sockaddr_in *) sa, a, port);
  272:   else if (af == AF_INET6)
  273:     sockaddr_read6((struct sockaddr_in6 *) sa, a, ifa, port);
  274:   else
  275:     goto fail;
  276: 
  277:   return 0;
  278: 
  279:  fail:
  280:   *a = IPA_NONE;
  281:   *port = 0;
  282:   return -1;
  283: }
  284: 
  285: 
  286: /*
  287:  *	IPv6 multicast syscalls
  288:  */
  289: 
  290: /* Fortunately standardized in RFC 3493 */
  291: 
  292: #define INIT_MREQ6(maddr,ifa) \
  293:   { .ipv6mr_multiaddr = ipa_to_in6(maddr), .ipv6mr_interface = ifa->index }
  294: 
  295: static inline int
  296: sk_setup_multicast6(sock *s)
  297: {
  298:   int index = s->iface->index;
  299:   int ttl = s->ttl;
  300:   int n = 0;
  301: 
  302:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0)
  303:     ERR("IPV6_MULTICAST_IF");
  304: 
  305:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &ttl, sizeof(ttl)) < 0)
  306:     ERR("IPV6_MULTICAST_HOPS");
  307: 
  308:   if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_LOOP, &n, sizeof(n)) < 0)
  309:     ERR("IPV6_MULTICAST_LOOP");
  310: 
  311:   return 0;
  312: }
  313: 
  314: static inline int
  315: sk_join_group6(sock *s, ip_addr maddr)
  316: {
  317:   struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
  318: 
  319:   if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mr, sizeof(mr)) < 0)
  320:     ERR("IPV6_JOIN_GROUP");
  321: 
  322:   return 0;
  323: }
  324: 
  325: static inline int
  326: sk_leave_group6(sock *s, ip_addr maddr)
  327: {
  328:   struct ipv6_mreq mr = INIT_MREQ6(maddr, s->iface);
  329: 
  330:   if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mr, sizeof(mr)) < 0)
  331:     ERR("IPV6_LEAVE_GROUP");
  332: 
  333:   return 0;
  334: }
  335: 
  336: 
  337: /*
  338:  *	IPv6 packet control messages
  339:  */
  340: 
  341: /* Also standardized, in RFC 3542 */
  342: 
  343: /*
  344:  * RFC 2292 uses IPV6_PKTINFO for both the socket option and the cmsg
  345:  * type, RFC 3542 changed the socket option to IPV6_RECVPKTINFO. If we
  346:  * don't have IPV6_RECVPKTINFO we suppose the OS implements the older
  347:  * RFC and we use IPV6_PKTINFO.
  348:  */
  349: #ifndef IPV6_RECVPKTINFO
  350: #define IPV6_RECVPKTINFO IPV6_PKTINFO
  351: #endif
  352: /*
  353:  * Same goes for IPV6_HOPLIMIT -> IPV6_RECVHOPLIMIT.
  354:  */
  355: #ifndef IPV6_RECVHOPLIMIT
  356: #define IPV6_RECVHOPLIMIT IPV6_HOPLIMIT
  357: #endif
  358: 
  359: 
  360: #define CMSG6_SPACE_PKTINFO CMSG_SPACE(sizeof(struct in6_pktinfo))
  361: #define CMSG6_SPACE_TTL CMSG_SPACE(sizeof(int))
  362: 
  363: static inline int
  364: sk_request_cmsg6_pktinfo(sock *s)
  365: {
  366:   int y = 1;
  367: 
  368:   if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &y, sizeof(y)) < 0)
  369:     ERR("IPV6_RECVPKTINFO");
  370: 
  371:   return 0;
  372: }
  373: 
  374: static inline int
  375: sk_request_cmsg6_ttl(sock *s)
  376: {
  377:   int y = 1;
  378: 
  379:   if (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &y, sizeof(y)) < 0)
  380:     ERR("IPV6_RECVHOPLIMIT");
  381: 
  382:   return 0;
  383: }
  384: 
  385: static inline void
  386: sk_process_cmsg6_pktinfo(sock *s, struct cmsghdr *cm)
  387: {
  388:   if (cm->cmsg_type == IPV6_PKTINFO)
  389:   {
  390:     struct in6_pktinfo *pi = (struct in6_pktinfo *) CMSG_DATA(cm);
  391:     s->laddr = ipa_from_in6(pi->ipi6_addr);
  392:     s->lifindex = pi->ipi6_ifindex;
  393:   }
  394: }
  395: 
  396: static inline void
  397: sk_process_cmsg6_ttl(sock *s, struct cmsghdr *cm)
  398: {
  399:   if (cm->cmsg_type == IPV6_HOPLIMIT)
  400:     s->rcv_ttl = * (int *) CMSG_DATA(cm);
  401: }
  402: 
  403: static inline void
  404: sk_prepare_cmsgs6(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
  405: {
  406:   struct cmsghdr *cm;
  407:   struct in6_pktinfo *pi;
  408:   int controllen = 0;
  409: 
  410:   msg->msg_control = cbuf;
  411:   msg->msg_controllen = cbuflen;
  412: 
  413:   cm = CMSG_FIRSTHDR(msg);
  414:   cm->cmsg_level = SOL_IPV6;
  415:   cm->cmsg_type = IPV6_PKTINFO;
  416:   cm->cmsg_len = CMSG_LEN(sizeof(*pi));
  417:   controllen += CMSG_SPACE(sizeof(*pi));
  418: 
  419:   pi = (struct in6_pktinfo *) CMSG_DATA(cm);
  420:   pi->ipi6_ifindex = s->iface ? s->iface->index : 0;
  421:   pi->ipi6_addr = ipa_to_in6(s->saddr);
  422: 
  423:   msg->msg_controllen = controllen;
  424: }
  425: 
  426: 
  427: /*
  428:  *	Miscellaneous socket syscalls
  429:  */
  430: 
  431: static inline int
  432: sk_set_ttl4(sock *s, int ttl)
  433: {
  434:   if (setsockopt(s->fd, SOL_IP, IP_TTL, &ttl, sizeof(ttl)) < 0)
  435:     ERR("IP_TTL");
  436: 
  437:   return 0;
  438: }
  439: 
  440: static inline int
  441: sk_set_ttl6(sock *s, int ttl)
  442: {
  443:   if (setsockopt(s->fd, SOL_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) < 0)
  444:     ERR("IPV6_UNICAST_HOPS");
  445: 
  446:   return 0;
  447: }
  448: 
  449: static inline int
  450: sk_set_tos4(sock *s, int tos)
  451: {
  452:   if (setsockopt(s->fd, SOL_IP, IP_TOS, &tos, sizeof(tos)) < 0)
  453:     ERR("IP_TOS");
  454: 
  455:   return 0;
  456: }
  457: 
  458: static inline int
  459: sk_set_tos6(sock *s, int tos)
  460: {
  461:   if (setsockopt(s->fd, SOL_IPV6, IPV6_TCLASS, &tos, sizeof(tos)) < 0)
  462:     ERR("IPV6_TCLASS");
  463: 
  464:   return 0;
  465: }
  466: 
  467: static inline int
  468: sk_set_high_port(sock *s UNUSED)
  469: {
  470:   /* Port range setting is optional, ignore it if not supported */
  471: 
  472: #ifdef IP_PORTRANGE
  473:   if (sk_is_ipv4(s))
  474:   {
  475:     int range = IP_PORTRANGE_HIGH;
  476:     if (setsockopt(s->fd, SOL_IP, IP_PORTRANGE, &range, sizeof(range)) < 0)
  477:       ERR("IP_PORTRANGE");
  478:   }
  479: #endif
  480: 
  481: #ifdef IPV6_PORTRANGE
  482:   if (sk_is_ipv6(s))
  483:   {
  484:     int range = IPV6_PORTRANGE_HIGH;
  485:     if (setsockopt(s->fd, SOL_IPV6, IPV6_PORTRANGE, &range, sizeof(range)) < 0)
  486:       ERR("IPV6_PORTRANGE");
  487:   }
  488: #endif
  489: 
  490:   return 0;
  491: }
  492: 
  493: static inline byte *
  494: sk_skip_ip_header(byte *pkt, int *len)
  495: {
  496:   if ((*len < 20) || ((*pkt & 0xf0) != 0x40))
  497:     return NULL;
  498: 
  499:   int hlen = (*pkt & 0x0f) * 4;
  500:   if ((hlen < 20) || (hlen > *len))
  501:     return NULL;
  502: 
  503:   *len -= hlen;
  504:   return pkt + hlen;
  505: }
  506: 
  507: byte *
  508: sk_rx_buffer(sock *s, int *len)
  509: {
  510:   if (sk_is_ipv4(s) && (s->type == SK_IP))
  511:     return sk_skip_ip_header(s->rbuf, len);
  512:   else
  513:     return s->rbuf;
  514: }
  515: 
  516: 
  517: /*
  518:  *	Public socket functions
  519:  */
  520: 
  521: /**
  522:  * sk_setup_multicast - enable multicast for given socket
  523:  * @s: socket
  524:  *
  525:  * Prepare transmission of multicast packets for given datagram socket.
  526:  * The socket must have defined @iface.
  527:  *
  528:  * Result: 0 for success, -1 for an error.
  529:  */
  530: 
  531: int
  532: sk_setup_multicast(sock *s)
  533: {
  534:   ASSERT(s->iface);
  535: 
  536:   if (sk_is_ipv4(s))
  537:     return sk_setup_multicast4(s);
  538:   else
  539:     return sk_setup_multicast6(s);
  540: }
  541: 
  542: /**
  543:  * sk_join_group - join multicast group for given socket
  544:  * @s: socket
  545:  * @maddr: multicast address
  546:  *
  547:  * Join multicast group for given datagram socket and associated interface.
  548:  * The socket must have defined @iface.
  549:  *
  550:  * Result: 0 for success, -1 for an error.
  551:  */
  552: 
  553: int
  554: sk_join_group(sock *s, ip_addr maddr)
  555: {
  556:   if (sk_is_ipv4(s))
  557:     return sk_join_group4(s, maddr);
  558:   else
  559:     return sk_join_group6(s, maddr);
  560: }
  561: 
  562: /**
  563:  * sk_leave_group - leave multicast group for given socket
  564:  * @s: socket
  565:  * @maddr: multicast address
  566:  *
  567:  * Leave multicast group for given datagram socket and associated interface.
  568:  * The socket must have defined @iface.
  569:  *
  570:  * Result: 0 for success, -1 for an error.
  571:  */
  572: 
  573: int
  574: sk_leave_group(sock *s, ip_addr maddr)
  575: {
  576:   if (sk_is_ipv4(s))
  577:     return sk_leave_group4(s, maddr);
  578:   else
  579:     return sk_leave_group6(s, maddr);
  580: }
  581: 
  582: /**
  583:  * sk_setup_broadcast - enable broadcast for given socket
  584:  * @s: socket
  585:  *
  586:  * Allow reception and transmission of broadcast packets for given datagram
  587:  * socket. The socket must have defined @iface. For transmission, packets should
  588:  * be send to @brd address of @iface.
  589:  *
  590:  * Result: 0 for success, -1 for an error.
  591:  */
  592: 
  593: int
  594: sk_setup_broadcast(sock *s)
  595: {
  596:   int y = 1;
  597: 
  598:   if (setsockopt(s->fd, SOL_SOCKET, SO_BROADCAST, &y, sizeof(y)) < 0)
  599:     ERR("SO_BROADCAST");
  600: 
  601:   return 0;
  602: }
  603: 
  604: /**
  605:  * sk_set_ttl - set transmit TTL for given socket
  606:  * @s: socket
  607:  * @ttl: TTL value
  608:  *
  609:  * Set TTL for already opened connections when TTL was not set before. Useful
  610:  * for accepted connections when different ones should have different TTL.
  611:  *
  612:  * Result: 0 for success, -1 for an error.
  613:  */
  614: 
  615: int
  616: sk_set_ttl(sock *s, int ttl)
  617: {
  618:   s->ttl = ttl;
  619: 
  620:   if (sk_is_ipv4(s))
  621:     return sk_set_ttl4(s, ttl);
  622:   else
  623:     return sk_set_ttl6(s, ttl);
  624: }
  625: 
  626: /**
  627:  * sk_set_min_ttl - set minimal accepted TTL for given socket
  628:  * @s: socket
  629:  * @ttl: TTL value
  630:  *
  631:  * Set minimal accepted TTL for given socket. Can be used for TTL security.
  632:  * implementations.
  633:  *
  634:  * Result: 0 for success, -1 for an error.
  635:  */
  636: 
  637: int
  638: sk_set_min_ttl(sock *s, int ttl)
  639: {
  640:   if (sk_is_ipv4(s))
  641:     return sk_set_min_ttl4(s, ttl);
  642:   else
  643:     return sk_set_min_ttl6(s, ttl);
  644: }
  645: 
  646: #if 0
  647: /**
  648:  * sk_set_md5_auth - add / remove MD5 security association for given socket
  649:  * @s: socket
  650:  * @local: IP address of local side
  651:  * @remote: IP address of remote side
  652:  * @ifa: Interface for link-local IP address
  653:  * @passwd: Password used for MD5 authentication
  654:  * @setkey: Update also system SA/SP database
  655:  *
  656:  * In TCP MD5 handling code in kernel, there is a set of security associations
  657:  * used for choosing password and other authentication parameters according to
  658:  * the local and remote address. This function is useful for listening socket,
  659:  * for active sockets it may be enough to set s->password field.
  660:  *
  661:  * When called with passwd != NULL, the new pair is added,
  662:  * When called with passwd == NULL, the existing pair is removed.
  663:  *
  664:  * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are
  665:  * stored in global SA/SP database (but the behavior also must be enabled on
  666:  * per-socket basis). In case of multiple sockets to the same neighbor, the
  667:  * socket-specific state must be configured for each socket while global state
  668:  * just once per src-dst pair. The @setkey argument controls whether the global
  669:  * state (SA/SP database) is also updated.
  670:  *
  671:  * Result: 0 for success, -1 for an error.
  672:  */
  673: 
  674: int
  675: sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey)
  676: { DUMMY; }
  677: #endif
  678: 
  679: /**
  680:  * sk_set_ipv6_checksum - specify IPv6 checksum offset for given socket
  681:  * @s: socket
  682:  * @offset: offset
  683:  *
  684:  * Specify IPv6 checksum field offset for given raw IPv6 socket. After that, the
  685:  * kernel will automatically fill it for outgoing packets and check it for
  686:  * incoming packets. Should not be used on ICMPv6 sockets, where the position is
  687:  * known to the kernel.
  688:  *
  689:  * Result: 0 for success, -1 for an error.
  690:  */
  691: 
  692: int
  693: sk_set_ipv6_checksum(sock *s, int offset)
  694: {
  695:   if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0)
  696:     ERR("IPV6_CHECKSUM");
  697: 
  698:   return 0;
  699: }
  700: 
  701: int
  702: sk_set_icmp6_filter(sock *s, int p1, int p2)
  703: {
  704:   /* a bit of lame interface, but it is here only for Radv */
  705:   struct icmp6_filter f;
  706: 
  707:   ICMP6_FILTER_SETBLOCKALL(&f);
  708:   ICMP6_FILTER_SETPASS(p1, &f);
  709:   ICMP6_FILTER_SETPASS(p2, &f);
  710: 
  711:   if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0)
  712:     ERR("ICMP6_FILTER");
  713: 
  714:   return 0;
  715: }
  716: 
  717: void
  718: sk_log_error(sock *s, const char *p)
  719: {
  720:   log(L_ERR "%s: Socket error: %s%#m", p, s->err);
  721: }
  722: 
  723: 
  724: /*
  725:  *	Actual struct birdsock code
  726:  */
  727: 
  728: static list sock_list;
  729: static struct birdsock *current_sock;
  730: static struct birdsock *stored_sock;
  731: 
  732: static inline sock *
  733: sk_next(sock *s)
  734: {
  735:   if (!s->n.next->next)
  736:     return NULL;
  737:   else
  738:     return SKIP_BACK(sock, n, s->n.next);
  739: }
  740: 
  741: static void
  742: sk_alloc_bufs(sock *s)
  743: {
  744:   if (!s->rbuf && s->rbsize)
  745:     s->rbuf = s->rbuf_alloc = xmalloc(s->rbsize);
  746:   s->rpos = s->rbuf;
  747:   if (!s->tbuf && s->tbsize)
  748:     s->tbuf = s->tbuf_alloc = xmalloc(s->tbsize);
  749:   s->tpos = s->ttx = s->tbuf;
  750: }
  751: 
  752: static void
  753: sk_free_bufs(sock *s)
  754: {
  755:   if (s->rbuf_alloc)
  756:   {
  757:     xfree(s->rbuf_alloc);
  758:     s->rbuf = s->rbuf_alloc = NULL;
  759:   }
  760:   if (s->tbuf_alloc)
  761:   {
  762:     xfree(s->tbuf_alloc);
  763:     s->tbuf = s->tbuf_alloc = NULL;
  764:   }
  765: }
  766: 
  767: #ifdef HAVE_LIBSSH
  768: static void
  769: sk_ssh_free(sock *s)
  770: {
  771:   struct ssh_sock *ssh = s->ssh;
  772: 
  773:   if (s->ssh == NULL)
  774:     return;
  775: 
  776:   s->ssh = NULL;
  777: 
  778:   if (ssh->channel)
  779:   {
  780:     if (ssh_channel_is_open(ssh->channel))
  781:       ssh_channel_close(ssh->channel);
  782:     ssh_channel_free(ssh->channel);
  783:     ssh->channel = NULL;
  784:   }
  785: 
  786:   if (ssh->session)
  787:   {
  788:     ssh_disconnect(ssh->session);
  789:     ssh_free(ssh->session);
  790:     ssh->session = NULL;
  791:   }
  792: }
  793: #endif
  794: 
  795: static void
  796: sk_free(resource *r)
  797: {
  798:   sock *s = (sock *) r;
  799: 
  800:   sk_free_bufs(s);
  801: 
  802: #ifdef HAVE_LIBSSH
  803:   if (s->type == SK_SSH || s->type == SK_SSH_ACTIVE)
  804:     sk_ssh_free(s);
  805: #endif
  806: 
  807:   if (s->fd < 0)
  808:     return;
  809: 
  810:   /* FIXME: we should call sk_stop() for SKF_THREAD sockets */
  811:   if (!(s->flags & SKF_THREAD))
  812:   {
  813:     if (s == current_sock)
  814:       current_sock = sk_next(s);
  815:     if (s == stored_sock)
  816:       stored_sock = sk_next(s);
  817:     rem_node(&s->n);
  818:   }
  819: 
  820:   if (s->type != SK_SSH && s->type != SK_SSH_ACTIVE)
  821:     close(s->fd);
  822: 
  823:   s->fd = -1;
  824: }
  825: 
  826: void
  827: sk_set_rbsize(sock *s, uint val)
  828: {
  829:   ASSERT(s->rbuf_alloc == s->rbuf);
  830: 
  831:   if (s->rbsize == val)
  832:     return;
  833: 
  834:   s->rbsize = val;
  835:   xfree(s->rbuf_alloc);
  836:   s->rbuf_alloc = xmalloc(val);
  837:   s->rpos = s->rbuf = s->rbuf_alloc;
  838: }
  839: 
  840: void
  841: sk_set_tbsize(sock *s, uint val)
  842: {
  843:   ASSERT(s->tbuf_alloc == s->tbuf);
  844: 
  845:   if (s->tbsize == val)
  846:     return;
  847: 
  848:   byte *old_tbuf = s->tbuf;
  849: 
  850:   s->tbsize = val;
  851:   s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val);
  852:   s->tpos = s->tbuf + (s->tpos - old_tbuf);
  853:   s->ttx  = s->tbuf + (s->ttx  - old_tbuf);
  854: }
  855: 
  856: void
  857: sk_set_tbuf(sock *s, void *tbuf)
  858: {
  859:   s->tbuf = tbuf ?: s->tbuf_alloc;
  860:   s->ttx = s->tpos = s->tbuf;
  861: }
  862: 
  863: void
  864: sk_reallocate(sock *s)
  865: {
  866:   sk_free_bufs(s);
  867:   sk_alloc_bufs(s);
  868: }
  869: 
  870: static void
  871: sk_dump(resource *r)
  872: {
  873:   sock *s = (sock *) r;
  874:   static char *sk_type_names[] = { "TCP<", "TCP>", "TCP", "UDP", NULL, "IP", NULL, "MAGIC", "UNIX<", "UNIX", "SSH>", "SSH", "DEL!" };
  875: 
  876:   debug("(%s, ud=%p, sa=%I, sp=%d, da=%I, dp=%d, tos=%d, ttl=%d, if=%s)\n",
  877: 	sk_type_names[s->type],
  878: 	s->data,
  879: 	s->saddr,
  880: 	s->sport,
  881: 	s->daddr,
  882: 	s->dport,
  883: 	s->tos,
  884: 	s->ttl,
  885: 	s->iface ? s->iface->name : "none");
  886: }
  887: 
  888: static struct resclass sk_class = {
  889:   "Socket",
  890:   sizeof(sock),
  891:   sk_free,
  892:   sk_dump,
  893:   NULL,
  894:   NULL
  895: };
  896: 
  897: /**
  898:  * sk_new - create a socket
  899:  * @p: pool
  900:  *
  901:  * This function creates a new socket resource. If you want to use it,
  902:  * you need to fill in all the required fields of the structure and
  903:  * call sk_open() to do the actual opening of the socket.
  904:  *
  905:  * The real function name is sock_new(), sk_new() is a macro wrapper
  906:  * to avoid collision with OpenSSL.
  907:  */
  908: sock *
  909: sock_new(pool *p)
  910: {
  911:   sock *s = ralloc(p, &sk_class);
  912:   s->pool = p;
  913:   // s->saddr = s->daddr = IPA_NONE;
  914:   s->tos = s->priority = s->ttl = -1;
  915:   s->fd = -1;
  916:   return s;
  917: }
  918: 
  919: static int
  920: sk_setup(sock *s)
  921: {
  922:   int y = 1;
  923:   int fd = s->fd;
  924: 
  925:   if (s->type == SK_SSH_ACTIVE)
  926:     return 0;
  927: 
  928:   if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
  929:     ERR("O_NONBLOCK");
  930: 
  931:   if (!s->af)
  932:     return 0;
  933: 
  934:   if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND))
  935:     s->flags |= SKF_PKTINFO;
  936: 
  937: #ifdef CONFIG_USE_HDRINCL
  938:   if (sk_is_ipv4(s) && (s->type == SK_IP) && (s->flags & SKF_PKTINFO))
  939:   {
  940:     s->flags &= ~SKF_PKTINFO;
  941:     s->flags |= SKF_HDRINCL;
  942:     if (setsockopt(fd, SOL_IP, IP_HDRINCL, &y, sizeof(y)) < 0)
  943:       ERR("IP_HDRINCL");
  944:   }
  945: #endif
  946: 
  947:   if (s->vrf && !s->iface)
  948:   {
  949:     /* Bind socket to associated VRF interface.
  950:        This is Linux-specific, but so is SO_BINDTODEVICE. */
  951: #ifdef SO_BINDTODEVICE
  952:     struct ifreq ifr = {};
  953:     strcpy(ifr.ifr_name, s->vrf->name);
  954:     if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
  955:       ERR("SO_BINDTODEVICE");
  956: #endif
  957:   }
  958: 
  959:   if (s->iface)
  960:   {
  961: #ifdef SO_BINDTODEVICE
  962:     struct ifreq ifr = {};
  963:     strcpy(ifr.ifr_name, s->iface->name);
  964:     if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0)
  965:       ERR("SO_BINDTODEVICE");
  966: #endif
  967: 
  968: #ifdef CONFIG_UNIX_DONTROUTE
  969:     if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &y, sizeof(y)) < 0)
  970:       ERR("SO_DONTROUTE");
  971: #endif
  972:   }
  973: 
  974:   if (sk_is_ipv4(s))
  975:   {
  976:     if (s->flags & SKF_LADDR_RX)
  977:       if (sk_request_cmsg4_pktinfo(s) < 0)
  978: 	return -1;
  979: 
  980:     if (s->flags & SKF_TTL_RX)
  981:       if (sk_request_cmsg4_ttl(s) < 0)
  982: 	return -1;
  983: 
  984:     if ((s->type == SK_UDP) || (s->type == SK_IP))
  985:       if (sk_disable_mtu_disc4(s) < 0)
  986: 	return -1;
  987: 
  988:     if (s->ttl >= 0)
  989:       if (sk_set_ttl4(s, s->ttl) < 0)
  990: 	return -1;
  991: 
  992:     if (s->tos >= 0)
  993:       if (sk_set_tos4(s, s->tos) < 0)
  994: 	return -1;
  995:   }
  996: 
  997:   if (sk_is_ipv6(s))
  998:   {
  999:     if ((s->type == SK_TCP_PASSIVE) || (s->type == SK_TCP_ACTIVE) || (s->type == SK_UDP))
 1000:       if (setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &y, sizeof(y)) < 0)
 1001: 	ERR("IPV6_V6ONLY");
 1002: 
 1003:     if (s->flags & SKF_LADDR_RX)
 1004:       if (sk_request_cmsg6_pktinfo(s) < 0)
 1005: 	return -1;
 1006: 
 1007:     if (s->flags & SKF_TTL_RX)
 1008:       if (sk_request_cmsg6_ttl(s) < 0)
 1009: 	return -1;
 1010: 
 1011:     if ((s->type == SK_UDP) || (s->type == SK_IP))
 1012:       if (sk_disable_mtu_disc6(s) < 0)
 1013: 	return -1;
 1014: 
 1015:     if (s->ttl >= 0)
 1016:       if (sk_set_ttl6(s, s->ttl) < 0)
 1017: 	return -1;
 1018: 
 1019:     if (s->tos >= 0)
 1020:       if (sk_set_tos6(s, s->tos) < 0)
 1021: 	return -1;
 1022:   }
 1023: 
 1024:   /* Must be after sk_set_tos4() as setting ToS on Linux also mangles priority */
 1025:   if (s->priority >= 0)
 1026:     if (sk_set_priority(s, s->priority) < 0)
 1027:       return -1;
 1028: 
 1029:   return 0;
 1030: }
 1031: 
 1032: static void
 1033: sk_insert(sock *s)
 1034: {
 1035:   add_tail(&sock_list, &s->n);
 1036: }
 1037: 
 1038: static void
 1039: sk_tcp_connected(sock *s)
 1040: {
 1041:   sockaddr sa;
 1042:   int sa_len = sizeof(sa);
 1043: 
 1044:   if ((getsockname(s->fd, &sa.sa, &sa_len) < 0) ||
 1045:       (sockaddr_read(&sa, s->af, &s->saddr, &s->iface, &s->sport) < 0))
 1046:     log(L_WARN "SOCK: Cannot get local IP address for TCP>");
 1047: 
 1048:   s->type = SK_TCP;
 1049:   sk_alloc_bufs(s);
 1050:   s->tx_hook(s);
 1051: }
 1052: 
 1053: #ifdef HAVE_LIBSSH
 1054: static void
 1055: sk_ssh_connected(sock *s)
 1056: {
 1057:   sk_alloc_bufs(s);
 1058:   s->type = SK_SSH;
 1059:   s->tx_hook(s);
 1060: }
 1061: #endif
 1062: 
 1063: static int
 1064: sk_passive_connected(sock *s, int type)
 1065: {
 1066:   sockaddr loc_sa, rem_sa;
 1067:   int loc_sa_len = sizeof(loc_sa);
 1068:   int rem_sa_len = sizeof(rem_sa);
 1069: 
 1070:   int fd = accept(s->fd, ((type == SK_TCP) ? &rem_sa.sa : NULL), &rem_sa_len);
 1071:   if (fd < 0)
 1072:   {
 1073:     if ((errno != EINTR) && (errno != EAGAIN))
 1074:       s->err_hook(s, errno);
 1075:     return 0;
 1076:   }
 1077: 
 1078:   sock *t = sk_new(s->pool);
 1079:   t->type = type;
 1080:   t->data = s->data;
 1081:   t->af = s->af;
 1082:   t->fd = fd;
 1083:   t->ttl = s->ttl;
 1084:   t->tos = s->tos;
 1085:   t->vrf = s->vrf;
 1086:   t->rbsize = s->rbsize;
 1087:   t->tbsize = s->tbsize;
 1088: 
 1089:   if (type == SK_TCP)
 1090:   {
 1091:     if ((getsockname(fd, &loc_sa.sa, &loc_sa_len) < 0) ||
 1092: 	(sockaddr_read(&loc_sa, s->af, &t->saddr, &t->iface, &t->sport) < 0))
 1093:       log(L_WARN "SOCK: Cannot get local IP address for TCP<");
 1094: 
 1095:     if (sockaddr_read(&rem_sa, s->af, &t->daddr, &t->iface, &t->dport) < 0)
 1096:       log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
 1097:   }
 1098: 
 1099:   if (sk_setup(t) < 0)
 1100:   {
 1101:     /* FIXME: Call err_hook instead ? */
 1102:     log(L_ERR "SOCK: Incoming connection: %s%#m", t->err);
 1103: 
 1104:     /* FIXME: handle it better in rfree() */
 1105:     close(t->fd);
 1106:     t->fd = -1;
 1107:     rfree(t);
 1108:     return 1;
 1109:   }
 1110: 
 1111:   sk_insert(t);
 1112:   sk_alloc_bufs(t);
 1113:   s->rx_hook(t, 0);
 1114:   return 1;
 1115: }
 1116: 
 1117: #ifdef HAVE_LIBSSH
 1118: /*
 1119:  * Return SSH_OK or SSH_AGAIN or SSH_ERROR
 1120:  */
 1121: static int
 1122: sk_ssh_connect(sock *s)
 1123: {
 1124:   s->fd = ssh_get_fd(s->ssh->session);
 1125: 
 1126:   /* Big fall thru automata */
 1127:   switch (s->ssh->state)
 1128:   {
 1129:   case SK_SSH_CONNECT:
 1130:   {
 1131:     switch (ssh_connect(s->ssh->session))
 1132:     {
 1133:     case SSH_AGAIN:
 1134:       /* A quick look into libSSH shows that ssh_get_fd() should return non-(-1)
 1135:        * after SSH_AGAIN is returned by ssh_connect(). This is however nowhere
 1136:        * documented but our code relies on that.
 1137:        */
 1138:       return SSH_AGAIN;
 1139: 
 1140:     case SSH_OK:
 1141:       break;
 1142: 
 1143:     default:
 1144:       return SSH_ERROR;
 1145:     }
 1146:   } /* fallthrough */
 1147: 
 1148:   case SK_SSH_SERVER_KNOWN:
 1149:   {
 1150:     s->ssh->state = SK_SSH_SERVER_KNOWN;
 1151: 
 1152:     if (s->ssh->server_hostkey_path)
 1153:     {
 1154:       int server_identity_is_ok = 1;
 1155: 
 1156:       /* Check server identity */
 1157:       switch (ssh_is_server_known(s->ssh->session))
 1158:       {
 1159: #define LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s,msg,args...) log(L_WARN "SSH Identity %s@%s:%u: " msg, (s)->ssh->username, (s)->host, (s)->dport, ## args);
 1160:       case SSH_SERVER_KNOWN_OK:
 1161: 	/* The server is known and has not changed. */
 1162: 	break;
 1163: 
 1164:       case SSH_SERVER_NOT_KNOWN:
 1165: 	LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server is unknown, its public key was not found in the known host file %s", s->ssh->server_hostkey_path);
 1166: 	break;
 1167: 
 1168:       case SSH_SERVER_KNOWN_CHANGED:
 1169: 	LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server key has changed. Either you are under attack or the administrator changed the key.");
 1170: 	server_identity_is_ok = 0;
 1171: 	break;
 1172: 
 1173:       case SSH_SERVER_FILE_NOT_FOUND:
 1174: 	LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The known host file %s does not exist", s->ssh->server_hostkey_path);
 1175: 	server_identity_is_ok = 0;
 1176: 	break;
 1177: 
 1178:       case SSH_SERVER_ERROR:
 1179: 	LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "Some error happened");
 1180: 	server_identity_is_ok = 0;
 1181: 	break;
 1182: 
 1183:       case SSH_SERVER_FOUND_OTHER:
 1184: 	LOG_WARN_ABOUT_SSH_SERVER_VALIDATION(s, "The server gave use a key of a type while we had an other type recorded. " \
 1185: 					     "It is a possible attack.");
 1186: 	server_identity_is_ok = 0;
 1187: 	break;
 1188:       }
 1189: 
 1190:       if (!server_identity_is_ok)
 1191: 	return SSH_ERROR;
 1192:     }
 1193:   } /* fallthrough */
 1194: 
 1195:   case SK_SSH_USERAUTH:
 1196:   {
 1197:     s->ssh->state = SK_SSH_USERAUTH;
 1198:     switch (ssh_userauth_publickey_auto(s->ssh->session, NULL, NULL))
 1199:     {
 1200:     case SSH_AUTH_AGAIN:
 1201:       return SSH_AGAIN;
 1202: 
 1203:     case SSH_AUTH_SUCCESS:
 1204:       break;
 1205: 
 1206:     default:
 1207:       return SSH_ERROR;
 1208:     }
 1209:   } /* fallthrough */
 1210: 
 1211:   case SK_SSH_CHANNEL:
 1212:   {
 1213:     s->ssh->state = SK_SSH_CHANNEL;
 1214:     s->ssh->channel = ssh_channel_new(s->ssh->session);
 1215:     if (s->ssh->channel == NULL)
 1216:       return SSH_ERROR;
 1217:   } /* fallthrough */
 1218: 
 1219:   case SK_SSH_SESSION:
 1220:   {
 1221:     s->ssh->state = SK_SSH_SESSION;
 1222:     switch (ssh_channel_open_session(s->ssh->channel))
 1223:     {
 1224:     case SSH_AGAIN:
 1225:       return SSH_AGAIN;
 1226: 
 1227:     case SSH_OK:
 1228:       break;
 1229: 
 1230:     default:
 1231:       return SSH_ERROR;
 1232:     }
 1233:   } /* fallthrough */
 1234: 
 1235:   case SK_SSH_SUBSYSTEM:
 1236:   {
 1237:     s->ssh->state = SK_SSH_SUBSYSTEM;
 1238:     if (s->ssh->subsystem)
 1239:     {
 1240:       switch (ssh_channel_request_subsystem(s->ssh->channel, s->ssh->subsystem))
 1241:       {
 1242:       case SSH_AGAIN:
 1243: 	return SSH_AGAIN;
 1244: 
 1245:       case SSH_OK:
 1246: 	break;
 1247: 
 1248:       default:
 1249: 	return SSH_ERROR;
 1250:       }
 1251:     }
 1252:   } /* fallthrough */
 1253: 
 1254:   case SK_SSH_ESTABLISHED:
 1255:     s->ssh->state = SK_SSH_ESTABLISHED;
 1256:   }
 1257: 
 1258:   return SSH_OK;
 1259: }
 1260: 
 1261: /*
 1262:  * Return file descriptor number if success
 1263:  * Return -1 if failed
 1264:  */
 1265: static int
 1266: sk_open_ssh(sock *s)
 1267: {
 1268:   if (!s->ssh)
 1269:     bug("sk_open() sock->ssh is not allocated");
 1270: 
 1271:   ssh_session sess = ssh_new();
 1272:   if (sess == NULL)
 1273:     ERR2("Cannot create a ssh session");
 1274:   s->ssh->session = sess;
 1275: 
 1276:   const int verbosity = SSH_LOG_NOLOG;
 1277:   ssh_options_set(sess, SSH_OPTIONS_LOG_VERBOSITY, &verbosity);
 1278:   ssh_options_set(sess, SSH_OPTIONS_HOST, s->host);
 1279:   ssh_options_set(sess, SSH_OPTIONS_PORT, &(s->dport));
 1280:   /* TODO: Add SSH_OPTIONS_BINDADDR */
 1281:   ssh_options_set(sess, SSH_OPTIONS_USER, s->ssh->username);
 1282: 
 1283:   if (s->ssh->server_hostkey_path)
 1284:     ssh_options_set(sess, SSH_OPTIONS_KNOWNHOSTS, s->ssh->server_hostkey_path);
 1285: 
 1286:   if (s->ssh->client_privkey_path)
 1287:     ssh_options_set(sess, SSH_OPTIONS_IDENTITY, s->ssh->client_privkey_path);
 1288: 
 1289:   ssh_set_blocking(sess, 0);
 1290: 
 1291:   switch (sk_ssh_connect(s))
 1292:   {
 1293:   case SSH_AGAIN:
 1294:     break;
 1295: 
 1296:   case SSH_OK:
 1297:     sk_ssh_connected(s);
 1298:     break;
 1299: 
 1300:   case SSH_ERROR:
 1301:     ERR2(ssh_get_error(sess));
 1302:     break;
 1303:   }
 1304: 
 1305:   return ssh_get_fd(sess);
 1306: 
 1307:  err:
 1308:   return -1;
 1309: }
 1310: #endif
 1311: 
 1312: /**
 1313:  * sk_open - open a socket
 1314:  * @s: socket
 1315:  *
 1316:  * This function takes a socket resource created by sk_new() and
 1317:  * initialized by the user and binds a corresponding network connection
 1318:  * to it.
 1319:  *
 1320:  * Result: 0 for success, -1 for an error.
 1321:  */
 1322: int
 1323: sk_open(sock *s)
 1324: {
 1325:   int af = AF_UNSPEC;
 1326:   int fd = -1;
 1327:   int do_bind = 0;
 1328:   int bind_port = 0;
 1329:   ip_addr bind_addr = IPA_NONE;
 1330:   sockaddr sa;
 1331: 
 1332:   if (s->type <= SK_IP)
 1333:   {
 1334:     /*
 1335:      * For TCP/IP sockets, Address family (IPv4 or IPv6) can be specified either
 1336:      * explicitly (SK_IPV4 or SK_IPV6) or implicitly (based on saddr, daddr).
 1337:      * But the specifications have to be consistent.
 1338:      */
 1339: 
 1340:     switch (s->subtype)
 1341:     {
 1342:     case 0:
 1343:       ASSERT(ipa_zero(s->saddr) || ipa_zero(s->daddr) ||
 1344: 	     (ipa_is_ip4(s->saddr) == ipa_is_ip4(s->daddr)));
 1345:       af = (ipa_is_ip4(s->saddr) || ipa_is_ip4(s->daddr)) ? AF_INET : AF_INET6;
 1346:       break;
 1347: 
 1348:     case SK_IPV4:
 1349:       ASSERT(ipa_zero(s->saddr) || ipa_is_ip4(s->saddr));
 1350:       ASSERT(ipa_zero(s->daddr) || ipa_is_ip4(s->daddr));
 1351:       af = AF_INET;
 1352:       break;
 1353: 
 1354:     case SK_IPV6:
 1355:       ASSERT(ipa_zero(s->saddr) || !ipa_is_ip4(s->saddr));
 1356:       ASSERT(ipa_zero(s->daddr) || !ipa_is_ip4(s->daddr));
 1357:       af = AF_INET6;
 1358:       break;
 1359: 
 1360:     default:
 1361:       bug("Invalid subtype %d", s->subtype);
 1362:     }
 1363:   }
 1364: 
 1365:   switch (s->type)
 1366:   {
 1367:   case SK_TCP_ACTIVE:
 1368:     s->ttx = "";			/* Force s->ttx != s->tpos */
 1369:     /* Fall thru */
 1370:   case SK_TCP_PASSIVE:
 1371:     fd = socket(af, SOCK_STREAM, IPPROTO_TCP);
 1372:     bind_port = s->sport;
 1373:     bind_addr = s->saddr;
 1374:     do_bind = bind_port || ipa_nonzero(bind_addr);
 1375:     break;
 1376: 
 1377: #ifdef HAVE_LIBSSH
 1378:   case SK_SSH_ACTIVE:
 1379:     s->ttx = "";			/* Force s->ttx != s->tpos */
 1380:     fd = sk_open_ssh(s);
 1381:     break;
 1382: #endif
 1383: 
 1384:   case SK_UDP:
 1385:     fd = socket(af, SOCK_DGRAM, IPPROTO_UDP);
 1386:     bind_port = s->sport;
 1387:     bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
 1388:     do_bind = 1;
 1389:     break;
 1390: 
 1391:   case SK_IP:
 1392:     fd = socket(af, SOCK_RAW, s->dport);
 1393:     bind_port = 0;
 1394:     bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE;
 1395:     do_bind = ipa_nonzero(bind_addr);
 1396:     break;
 1397: 
 1398:   case SK_MAGIC:
 1399:     af = 0;
 1400:     fd = s->fd;
 1401:     break;
 1402: 
 1403:   default:
 1404:     bug("sk_open() called for invalid sock type %d", s->type);
 1405:   }
 1406: 
 1407:   if (fd < 0)
 1408:     ERR("socket");
 1409: 
 1410:   s->af = af;
 1411:   s->fd = fd;
 1412: 
 1413:   if (sk_setup(s) < 0)
 1414:     goto err;
 1415: 
 1416:   if (do_bind)
 1417:   {
 1418:     if (bind_port)
 1419:     {
 1420:       int y = 1;
 1421: 
 1422:       if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &y, sizeof(y)) < 0)
 1423: 	ERR2("SO_REUSEADDR");
 1424: 
 1425: #ifdef CONFIG_NO_IFACE_BIND
 1426:       /* Workaround missing ability to bind to an iface */
 1427:       if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr))
 1428:       {
 1429: 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &y, sizeof(y)) < 0)
 1430: 	  ERR2("SO_REUSEPORT");
 1431:       }
 1432: #endif
 1433:     }
 1434:     else
 1435:       if (s->flags & SKF_HIGH_PORT)
 1436: 	if (sk_set_high_port(s) < 0)
 1437: 	  log(L_WARN "Socket error: %s%#m", s->err);
 1438: 
 1439:     sockaddr_fill(&sa, s->af, bind_addr, s->iface, bind_port);
 1440:     if (bind(fd, &sa.sa, SA_LEN(sa)) < 0)
 1441:       ERR2("bind");
 1442:   }
 1443: 
 1444:   if (s->password)
 1445:     if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0)
 1446:       goto err;
 1447: 
 1448:   switch (s->type)
 1449:   {
 1450:   case SK_TCP_ACTIVE:
 1451:     sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
 1452:     if (connect(fd, &sa.sa, SA_LEN(sa)) >= 0)
 1453:       sk_tcp_connected(s);
 1454:     else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS &&
 1455: 	     errno != ECONNREFUSED && errno != EHOSTUNREACH && errno != ENETUNREACH)
 1456:       ERR2("connect");
 1457:     break;
 1458: 
 1459:   case SK_TCP_PASSIVE:
 1460:     if (listen(fd, 8) < 0)
 1461:       ERR2("listen");
 1462:     break;
 1463: 
 1464:   case SK_SSH_ACTIVE:
 1465:   case SK_MAGIC:
 1466:     break;
 1467: 
 1468:   default:
 1469:     sk_alloc_bufs(s);
 1470:   }
 1471: 
 1472:   if (!(s->flags & SKF_THREAD))
 1473:     sk_insert(s);
 1474: 
 1475:   return 0;
 1476: 
 1477: err:
 1478:   close(fd);
 1479:   s->fd = -1;
 1480:   return -1;
 1481: }
 1482: 
 1483: int
 1484: sk_open_unix(sock *s, char *name)
 1485: {
 1486:   struct sockaddr_un sa;
 1487:   int fd;
 1488: 
 1489:   /* We are sloppy during error (leak fd and not set s->err), but we die anyway */
 1490: 
 1491:   fd = socket(AF_UNIX, SOCK_STREAM, 0);
 1492:   if (fd < 0)
 1493:     return -1;
 1494: 
 1495:   if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0)
 1496:     return -1;
 1497: 
 1498:   /* Path length checked in test_old_bird() */
 1499:   sa.sun_family = AF_UNIX;
 1500:   strcpy(sa.sun_path, name);
 1501: 
 1502:   if (bind(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0)
 1503:     return -1;
 1504: 
 1505:   if (listen(fd, 8) < 0)
 1506:     return -1;
 1507: 
 1508:   s->fd = fd;
 1509:   sk_insert(s);
 1510:   return 0;
 1511: }
 1512: 
 1513: 
 1514: #define CMSG_RX_SPACE MAX(CMSG4_SPACE_PKTINFO+CMSG4_SPACE_TTL, \
 1515: 			  CMSG6_SPACE_PKTINFO+CMSG6_SPACE_TTL)
 1516: #define CMSG_TX_SPACE MAX(CMSG4_SPACE_PKTINFO,CMSG6_SPACE_PKTINFO)
 1517: 
 1518: static void
 1519: sk_prepare_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen)
 1520: {
 1521:   if (sk_is_ipv4(s))
 1522:     sk_prepare_cmsgs4(s, msg, cbuf, cbuflen);
 1523:   else
 1524:     sk_prepare_cmsgs6(s, msg, cbuf, cbuflen);
 1525: }
 1526: 
 1527: static void
 1528: sk_process_cmsgs(sock *s, struct msghdr *msg)
 1529: {
 1530:   struct cmsghdr *cm;
 1531: 
 1532:   s->laddr = IPA_NONE;
 1533:   s->lifindex = 0;
 1534:   s->rcv_ttl = -1;
 1535: 
 1536:   for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm))
 1537:   {
 1538:     if ((cm->cmsg_level == SOL_IP) && sk_is_ipv4(s))
 1539:     {
 1540:       sk_process_cmsg4_pktinfo(s, cm);
 1541:       sk_process_cmsg4_ttl(s, cm);
 1542:     }
 1543: 
 1544:     if ((cm->cmsg_level == SOL_IPV6) && sk_is_ipv6(s))
 1545:     {
 1546:       sk_process_cmsg6_pktinfo(s, cm);
 1547:       sk_process_cmsg6_ttl(s, cm);
 1548:     }
 1549:   }
 1550: }
 1551: 
 1552: 
 1553: static inline int
 1554: sk_sendmsg(sock *s)
 1555: {
 1556:   struct iovec iov = {s->tbuf, s->tpos - s->tbuf};
 1557:   byte cmsg_buf[CMSG_TX_SPACE];
 1558:   sockaddr dst;
 1559:   int flags = 0;
 1560: 
 1561:   sockaddr_fill(&dst, s->af, s->daddr, s->iface, s->dport);
 1562: 
 1563:   struct msghdr msg = {
 1564:     .msg_name = &dst.sa,
 1565:     .msg_namelen = SA_LEN(dst),
 1566:     .msg_iov = &iov,
 1567:     .msg_iovlen = 1
 1568:   };
 1569: 
 1570: #ifdef CONFIG_DONTROUTE_UNICAST
 1571:   /* FreeBSD silently changes TTL to 1 when MSG_DONTROUTE is used, therefore we
 1572:      cannot use it for other cases (e.g. when TTL security is used). */
 1573:   if (ipa_is_ip4(s->daddr) && ip4_is_unicast(ipa_to_ip4(s->daddr)) && (s->ttl == 1))
 1574:     flags = MSG_DONTROUTE;
 1575: #endif
 1576: 
 1577: #ifdef CONFIG_USE_HDRINCL
 1578:   byte hdr[20];
 1579:   struct iovec iov2[2] = { {hdr, 20}, iov };
 1580: 
 1581:   if (s->flags & SKF_HDRINCL)
 1582:   {
 1583:     sk_prepare_ip_header(s, hdr, iov.iov_len);
 1584:     msg.msg_iov = iov2;
 1585:     msg.msg_iovlen = 2;
 1586:   }
 1587: #endif
 1588: 
 1589:   if (s->flags & SKF_PKTINFO)
 1590:     sk_prepare_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf));
 1591: 
 1592:   return sendmsg(s->fd, &msg, flags);
 1593: }
 1594: 
 1595: static inline int
 1596: sk_recvmsg(sock *s)
 1597: {
 1598:   struct iovec iov = {s->rbuf, s->rbsize};
 1599:   byte cmsg_buf[CMSG_RX_SPACE];
 1600:   sockaddr src;
 1601: 
 1602:   struct msghdr msg = {
 1603:     .msg_name = &src.sa,
 1604:     .msg_namelen = sizeof(src), // XXXX ??
 1605:     .msg_iov = &iov,
 1606:     .msg_iovlen = 1,
 1607:     .msg_control = cmsg_buf,
 1608:     .msg_controllen = sizeof(cmsg_buf),
 1609:     .msg_flags = 0
 1610:   };
 1611: 
 1612:   int rv = recvmsg(s->fd, &msg, 0);
 1613:   if (rv < 0)
 1614:     return rv;
 1615: 
 1616:   //ifdef IPV4
 1617:   //  if (cf_type == SK_IP)
 1618:   //    rv = ipv4_skip_header(pbuf, rv);
 1619:   //endif
 1620: 
 1621:   sockaddr_read(&src, s->af, &s->faddr, NULL, &s->fport);
 1622:   sk_process_cmsgs(s, &msg);
 1623: 
 1624:   if (msg.msg_flags & MSG_TRUNC)
 1625:     s->flags |= SKF_TRUNCATED;
 1626:   else
 1627:     s->flags &= ~SKF_TRUNCATED;
 1628: 
 1629:   return rv;
 1630: }
 1631: 
 1632: 
 1633: static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; }
 1634: 
 1635: static int
 1636: sk_maybe_write(sock *s)
 1637: {
 1638:   int e;
 1639: 
 1640:   switch (s->type)
 1641:   {
 1642:   case SK_TCP:
 1643:   case SK_MAGIC:
 1644:   case SK_UNIX:
 1645:     while (s->ttx != s->tpos)
 1646:     {
 1647:       e = write(s->fd, s->ttx, s->tpos - s->ttx);
 1648: 
 1649:       if (e < 0)
 1650:       {
 1651: 	if (errno != EINTR && errno != EAGAIN)
 1652: 	{
 1653: 	  reset_tx_buffer(s);
 1654: 	  /* EPIPE is just a connection close notification during TX */
 1655: 	  s->err_hook(s, (errno != EPIPE) ? errno : 0);
 1656: 	  return -1;
 1657: 	}
 1658: 	return 0;
 1659:       }
 1660:       s->ttx += e;
 1661:     }
 1662:     reset_tx_buffer(s);
 1663:     return 1;
 1664: 
 1665: #ifdef HAVE_LIBSSH
 1666:   case SK_SSH:
 1667:     while (s->ttx != s->tpos)
 1668:     {
 1669:       e = ssh_channel_write(s->ssh->channel, s->ttx, s->tpos - s->ttx);
 1670: 
 1671:       if (e < 0)
 1672:       {
 1673: 	s->err = ssh_get_error(s->ssh->session);
 1674: 	s->err_hook(s, ssh_get_error_code(s->ssh->session));
 1675: 
 1676: 	reset_tx_buffer(s);
 1677: 	/* EPIPE is just a connection close notification during TX */
 1678: 	s->err_hook(s, (errno != EPIPE) ? errno : 0);
 1679: 	return -1;
 1680:       }
 1681:       s->ttx += e;
 1682:     }
 1683:     reset_tx_buffer(s);
 1684:     return 1;
 1685: #endif
 1686: 
 1687:   case SK_UDP:
 1688:   case SK_IP:
 1689:     {
 1690:       if (s->tbuf == s->tpos)
 1691: 	return 1;
 1692: 
 1693:       e = sk_sendmsg(s);
 1694: 
 1695:       if (e < 0)
 1696:       {
 1697: 	if (errno != EINTR && errno != EAGAIN)
 1698: 	{
 1699: 	  reset_tx_buffer(s);
 1700: 	  s->err_hook(s, errno);
 1701: 	  return -1;
 1702: 	}
 1703: 
 1704: 	if (!s->tx_hook)
 1705: 	  reset_tx_buffer(s);
 1706: 	return 0;
 1707:       }
 1708:       reset_tx_buffer(s);
 1709:       return 1;
 1710:     }
 1711: 
 1712:   default:
 1713:     bug("sk_maybe_write: unknown socket type %d", s->type);
 1714:   }
 1715: }
 1716: 
 1717: int
 1718: sk_rx_ready(sock *s)
 1719: {
 1720:   int rv;
 1721:   struct pollfd pfd = { .fd = s->fd };
 1722:   pfd.events |= POLLIN;
 1723: 
 1724:  redo:
 1725:   rv = poll(&pfd, 1, 0);
 1726: 
 1727:   if ((rv < 0) && (errno == EINTR || errno == EAGAIN))
 1728:     goto redo;
 1729: 
 1730:   return rv;
 1731: }
 1732: 
 1733: /**
 1734:  * sk_send - send data to a socket
 1735:  * @s: socket
 1736:  * @len: number of bytes to send
 1737:  *
 1738:  * This function sends @len bytes of data prepared in the
 1739:  * transmit buffer of the socket @s to the network connection.
 1740:  * If the packet can be sent immediately, it does so and returns
 1741:  * 1, else it queues the packet for later processing, returns 0
 1742:  * and calls the @tx_hook of the socket when the tranmission
 1743:  * takes place.
 1744:  */
 1745: int
 1746: sk_send(sock *s, unsigned len)
 1747: {
 1748:   s->ttx = s->tbuf;
 1749:   s->tpos = s->tbuf + len;
 1750:   return sk_maybe_write(s);
 1751: }
 1752: 
 1753: /**
 1754:  * sk_send_to - send data to a specific destination
 1755:  * @s: socket
 1756:  * @len: number of bytes to send
 1757:  * @addr: IP address to send the packet to
 1758:  * @port: port to send the packet to
 1759:  *
 1760:  * This is a sk_send() replacement for connection-less packet sockets
 1761:  * which allows destination of the packet to be chosen dynamically.
 1762:  * Raw IP sockets should use 0 for @port.
 1763:  */
 1764: int
 1765: sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port)
 1766: {
 1767:   s->daddr = addr;
 1768:   if (port)
 1769:     s->dport = port;
 1770: 
 1771:   s->ttx = s->tbuf;
 1772:   s->tpos = s->tbuf + len;
 1773:   return sk_maybe_write(s);
 1774: }
 1775: 
 1776: /*
 1777: int
 1778: sk_send_full(sock *s, unsigned len, struct iface *ifa,
 1779: 	     ip_addr saddr, ip_addr daddr, unsigned dport)
 1780: {
 1781:   s->iface = ifa;
 1782:   s->saddr = saddr;
 1783:   s->daddr = daddr;
 1784:   s->dport = dport;
 1785:   s->ttx = s->tbuf;
 1786:   s->tpos = s->tbuf + len;
 1787:   return sk_maybe_write(s);
 1788: }
 1789: */
 1790: 
 1791: static void
 1792: call_rx_hook(sock *s, int size)
 1793: {
 1794:   if (s->rx_hook(s, size))
 1795:   {
 1796:     /* We need to be careful since the socket could have been deleted by the hook */
 1797:     if (current_sock == s)
 1798:       s->rpos = s->rbuf;
 1799:   }
 1800: }
 1801: 
 1802: #ifdef HAVE_LIBSSH
 1803: static int
 1804: sk_read_ssh(sock *s)
 1805: {
 1806:   ssh_channel rchans[2] = { s->ssh->channel, NULL };
 1807:   struct timeval timev = { 1, 0 };
 1808: 
 1809:   if (ssh_channel_select(rchans, NULL, NULL, &timev) == SSH_EINTR)
 1810:     return 1; /* Try again */
 1811: 
 1812:   if (ssh_channel_is_eof(s->ssh->channel) != 0)
 1813:   {
 1814:     /* The remote side is closing the connection */
 1815:     s->err_hook(s, 0);
 1816:     return 0;
 1817:   }
 1818: 
 1819:   if (rchans[0] == NULL)
 1820:     return 0; /* No data is available on the socket */
 1821: 
 1822:   const uint used_bytes = s->rpos - s->rbuf;
 1823:   const int read_bytes = ssh_channel_read_nonblocking(s->ssh->channel, s->rpos, s->rbsize - used_bytes, 0);
 1824:   if (read_bytes > 0)
 1825:   {
 1826:     /* Received data */
 1827:     s->rpos += read_bytes;
 1828:     call_rx_hook(s, used_bytes + read_bytes);
 1829:     return 1;
 1830:   }
 1831:   else if (read_bytes == 0)
 1832:   {
 1833:     if (ssh_channel_is_eof(s->ssh->channel) != 0)
 1834:     {
 1835: 	/* The remote side is closing the connection */
 1836: 	s->err_hook(s, 0);
 1837:     }
 1838:   }
 1839:   else
 1840:   {
 1841:     s->err = ssh_get_error(s->ssh->session);
 1842:     s->err_hook(s, ssh_get_error_code(s->ssh->session));
 1843:   }
 1844: 
 1845:   return 0; /* No data is available on the socket */
 1846: }
 1847: #endif
 1848: 
 1849:  /* sk_read() and sk_write() are called from BFD's event loop */
 1850: 
 1851: int
 1852: sk_read(sock *s, int revents)
 1853: {
 1854:   switch (s->type)
 1855:   {
 1856:   case SK_TCP_PASSIVE:
 1857:     return sk_passive_connected(s, SK_TCP);
 1858: 
 1859:   case SK_UNIX_PASSIVE:
 1860:     return sk_passive_connected(s, SK_UNIX);
 1861: 
 1862:   case SK_TCP:
 1863:   case SK_UNIX:
 1864:     {
 1865:       int c = read(s->fd, s->rpos, s->rbuf + s->rbsize - s->rpos);
 1866: 
 1867:       if (c < 0)
 1868:       {
 1869: 	if (errno != EINTR && errno != EAGAIN)
 1870: 	  s->err_hook(s, errno);
 1871: 	else if (errno == EAGAIN && !(revents & POLLIN))
 1872: 	{
 1873: 	  log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents);
 1874: 	  s->err_hook(s, 0);
 1875: 	}
 1876:       }
 1877:       else if (!c)
 1878: 	s->err_hook(s, 0);
 1879:       else
 1880:       {
 1881: 	s->rpos += c;
 1882: 	call_rx_hook(s, s->rpos - s->rbuf);
 1883: 	return 1;
 1884:       }
 1885:       return 0;
 1886:     }
 1887: 
 1888: #ifdef HAVE_LIBSSH
 1889:   case SK_SSH:
 1890:     return sk_read_ssh(s);
 1891: #endif
 1892: 
 1893:   case SK_MAGIC:
 1894:     return s->rx_hook(s, 0);
 1895: 
 1896:   default:
 1897:     {
 1898:       int e = sk_recvmsg(s);
 1899: 
 1900:       if (e < 0)
 1901:       {
 1902: 	if (errno != EINTR && errno != EAGAIN)
 1903: 	  s->err_hook(s, errno);
 1904: 	return 0;
 1905:       }
 1906: 
 1907:       s->rpos = s->rbuf + e;
 1908:       s->rx_hook(s, e);
 1909:       return 1;
 1910:     }
 1911:   }
 1912: }
 1913: 
 1914: int
 1915: sk_write(sock *s)
 1916: {
 1917:   switch (s->type)
 1918:   {
 1919:   case SK_TCP_ACTIVE:
 1920:     {
 1921:       sockaddr sa;
 1922:       sockaddr_fill(&sa, s->af, s->daddr, s->iface, s->dport);
 1923: 
 1924:       if (connect(s->fd, &sa.sa, SA_LEN(sa)) >= 0 || errno == EISCONN)
 1925: 	sk_tcp_connected(s);
 1926:       else if (errno != EINTR && errno != EAGAIN && errno != EINPROGRESS)
 1927: 	s->err_hook(s, errno);
 1928:       return 0;
 1929:     }
 1930: 
 1931: #ifdef HAVE_LIBSSH
 1932:   case SK_SSH_ACTIVE:
 1933:     {
 1934:       switch (sk_ssh_connect(s))
 1935:       {
 1936: 	case SSH_OK:
 1937: 	  sk_ssh_connected(s);
 1938: 	  break;
 1939: 
 1940: 	case SSH_AGAIN:
 1941: 	  return 1;
 1942: 
 1943: 	case SSH_ERROR:
 1944: 	  s->err = ssh_get_error(s->ssh->session);
 1945: 	  s->err_hook(s, ssh_get_error_code(s->ssh->session));
 1946: 	  break;
 1947:       }
 1948:       return 0;
 1949:     }
 1950: #endif
 1951: 
 1952:   default:
 1953:     if (s->ttx != s->tpos && sk_maybe_write(s) > 0)
 1954:     {
 1955:       if (s->tx_hook)
 1956: 	s->tx_hook(s);
 1957:       return 1;
 1958:     }
 1959:     return 0;
 1960:   }
 1961: }
 1962: 
 1963: int sk_is_ipv4(sock *s)
 1964: { return s->af == AF_INET; }
 1965: 
 1966: int sk_is_ipv6(sock *s)
 1967: { return s->af == AF_INET6; }
 1968: 
 1969: void
 1970: sk_err(sock *s, int revents)
 1971: {
 1972:   int se = 0, sse = sizeof(se);
 1973:   if ((s->type != SK_MAGIC) && (revents & POLLERR))
 1974:     if (getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &se, &sse) < 0)
 1975:     {
 1976:       log(L_ERR "IO: Socket error: SO_ERROR: %m");
 1977:       se = 0;
 1978:     }
 1979: 
 1980:   s->err_hook(s, se);
 1981: }
 1982: 
 1983: void
 1984: sk_dump_all(void)
 1985: {
 1986:   node *n;
 1987:   sock *s;
 1988: 
 1989:   debug("Open sockets:\n");
 1990:   WALK_LIST(n, sock_list)
 1991:   {
 1992:     s = SKIP_BACK(sock, n, n);
 1993:     debug("%p ", s);
 1994:     sk_dump(&s->r);
 1995:   }
 1996:   debug("\n");
 1997: }
 1998: 
 1999: 
 2000: /*
 2001:  *	Internal event log and watchdog
 2002:  */
 2003: 
 2004: #define EVENT_LOG_LENGTH 32
 2005: 
 2006: struct event_log_entry
 2007: {
 2008:   void *hook;
 2009:   void *data;
 2010:   btime timestamp;
 2011:   btime duration;
 2012: };
 2013: 
 2014: static struct event_log_entry event_log[EVENT_LOG_LENGTH];
 2015: static struct event_log_entry *event_open;
 2016: static int event_log_pos, event_log_num, watchdog_active;
 2017: static btime last_time;
 2018: static btime loop_time;
 2019: 
 2020: static void
 2021: io_update_time(void)
 2022: {
 2023:   struct timespec ts;
 2024:   int rv;
 2025: 
 2026:   /*
 2027:    * This is third time-tracking procedure (after update_times() above and
 2028:    * times_update() in BFD), dedicated to internal event log and latency
 2029:    * tracking. Hopefully, we consolidate these sometimes.
 2030:    */
 2031: 
 2032:   rv = clock_gettime(CLOCK_MONOTONIC, &ts);
 2033:   if (rv < 0)
 2034:     die("clock_gettime: %m");
 2035: 
 2036:   last_time = ts.tv_sec S + ts.tv_nsec NS;
 2037: 
 2038:   if (event_open)
 2039:   {
 2040:     event_open->duration = last_time - event_open->timestamp;
 2041: 
 2042:     if (event_open->duration > config->latency_limit)
 2043:       log(L_WARN "Event 0x%p 0x%p took %d ms",
 2044: 	  event_open->hook, event_open->data, (int) (event_open->duration TO_MS));
 2045: 
 2046:     event_open = NULL;
 2047:   }
 2048: }
 2049: 
 2050: /**
 2051:  * io_log_event - mark approaching event into event log
 2052:  * @hook: event hook address
 2053:  * @data: event data address
 2054:  *
 2055:  * Store info (hook, data, timestamp) about the following internal event into
 2056:  * a circular event log (@event_log). When latency tracking is enabled, the log
 2057:  * entry is kept open (in @event_open) so the duration can be filled later.
 2058:  */
 2059: void
 2060: io_log_event(void *hook, void *data)
 2061: {
 2062:   if (config->latency_debug)
 2063:     io_update_time();
 2064: 
 2065:   struct event_log_entry *en = event_log + event_log_pos;
 2066: 
 2067:   en->hook = hook;
 2068:   en->data = data;
 2069:   en->timestamp = last_time;
 2070:   en->duration = 0;
 2071: 
 2072:   event_log_num++;
 2073:   event_log_pos++;
 2074:   event_log_pos %= EVENT_LOG_LENGTH;
 2075: 
 2076:   event_open = config->latency_debug ? en : NULL;
 2077: }
 2078: 
 2079: static inline void
 2080: io_close_event(void)
 2081: {
 2082:   if (event_open)
 2083:     io_update_time();
 2084: }
 2085: 
 2086: void
 2087: io_log_dump(void)
 2088: {
 2089:   int i;
 2090: 
 2091:   log(L_DEBUG "Event log:");
 2092:   for (i = 0; i < EVENT_LOG_LENGTH; i++)
 2093:   {
 2094:     struct event_log_entry *en = event_log + (event_log_pos + i) % EVENT_LOG_LENGTH;
 2095:     if (en->hook)
 2096:       log(L_DEBUG "  Event 0x%p 0x%p at %8d for %d ms", en->hook, en->data,
 2097: 	  (int) ((last_time - en->timestamp) TO_MS), (int) (en->duration TO_MS));
 2098:   }
 2099: }
 2100: 
 2101: void
 2102: watchdog_sigalrm(int sig UNUSED)
 2103: {
 2104:   /* Update last_time and duration, but skip latency check */
 2105:   config->latency_limit = 0xffffffff;
 2106:   io_update_time();
 2107: 
 2108:   /* We want core dump */
 2109:   abort();
 2110: }
 2111: 
 2112: static inline void
 2113: watchdog_start1(void)
 2114: {
 2115:   io_update_time();
 2116: 
 2117:   loop_time = last_time;
 2118: }
 2119: 
 2120: static inline void
 2121: watchdog_start(void)
 2122: {
 2123:   io_update_time();
 2124: 
 2125:   loop_time = last_time;
 2126:   event_log_num = 0;
 2127: 
 2128:   if (config->watchdog_timeout)
 2129:   {
 2130:     alarm(config->watchdog_timeout);
 2131:     watchdog_active = 1;
 2132:   }
 2133: }
 2134: 
 2135: static inline void
 2136: watchdog_stop(void)
 2137: {
 2138:   io_update_time();
 2139: 
 2140:   if (watchdog_active)
 2141:   {
 2142:     alarm(0);
 2143:     watchdog_active = 0;
 2144:   }
 2145: 
 2146:   btime duration = last_time - loop_time;
 2147:   if (duration > config->watchdog_warning)
 2148:     log(L_WARN "I/O loop cycle took %d ms for %d events",
 2149: 	(int) (duration TO_MS), event_log_num);
 2150: }
 2151: 
 2152: 
 2153: /*
 2154:  *	Main I/O Loop
 2155:  */
 2156: 
 2157: void
 2158: io_init(void)
 2159: {
 2160:   init_list(&sock_list);
 2161:   init_list(&global_event_list);
 2162:   krt_io_init();
 2163:   // XXX init_times();
 2164:   // XXX update_times();
 2165:   boot_time = current_time();
 2166: 
 2167:   u64 now = (u64) current_real_time();
 2168:   srandom((uint) (now ^ (now >> 32)));
 2169: }
 2170: 
 2171: static int short_loops = 0;
 2172: #define SHORT_LOOP_MAX 10
 2173: 
 2174: void
 2175: io_loop(void)
 2176: {
 2177:   int poll_tout, timeout;
 2178:   int nfds, events, pout;
 2179:   timer *t;
 2180:   sock *s;
 2181:   node *n;
 2182:   int fdmax = 256;
 2183:   struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
 2184: 
 2185:   watchdog_start1();
 2186:   for(;;)
 2187:     {
 2188:       times_update(&main_timeloop);
 2189:       events = ev_run_list(&global_event_list);
 2190:       timers_fire(&main_timeloop);
 2191:       io_close_event();
 2192: 
 2193:       // FIXME
 2194:       poll_tout = (events ? 0 : 3000); /* Time in milliseconds */
 2195:       if (t = timers_first(&main_timeloop))
 2196:       {
 2197: 	times_update(&main_timeloop);
 2198: 	timeout = (tm_remains(t) TO_MS) + 1;
 2199: 	poll_tout = MIN(poll_tout, timeout);
 2200:       }
 2201: 
 2202:       nfds = 0;
 2203:       WALK_LIST(n, sock_list)
 2204: 	{
 2205: 	  pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
 2206: 	  s = SKIP_BACK(sock, n, n);
 2207: 	  if (s->rx_hook)
 2208: 	    {
 2209: 	      pfd[nfds].fd = s->fd;
 2210: 	      pfd[nfds].events |= POLLIN;
 2211: 	    }
 2212: 	  if (s->tx_hook && s->ttx != s->tpos)
 2213: 	    {
 2214: 	      pfd[nfds].fd = s->fd;
 2215: 	      pfd[nfds].events |= POLLOUT;
 2216: 	    }
 2217: 	  if (pfd[nfds].fd != -1)
 2218: 	    {
 2219: 	      s->index = nfds;
 2220: 	      nfds++;
 2221: 	    }
 2222: 	  else
 2223: 	    s->index = -1;
 2224: 
 2225: 	  if (nfds >= fdmax)
 2226: 	    {
 2227: 	      fdmax *= 2;
 2228: 	      pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
 2229: 	    }
 2230: 	}
 2231: 
 2232:       /*
 2233:        * Yes, this is racy. But even if the signal comes before this test
 2234:        * and entering poll(), it gets caught on the next timer tick.
 2235:        */
 2236: 
 2237:       if (async_config_flag)
 2238: 	{
 2239: 	  io_log_event(async_config, NULL);
 2240: 	  async_config();
 2241: 	  async_config_flag = 0;
 2242: 	  continue;
 2243: 	}
 2244:       if (async_dump_flag)
 2245: 	{
 2246: 	  io_log_event(async_dump, NULL);
 2247: 	  async_dump();
 2248: 	  async_dump_flag = 0;
 2249: 	  continue;
 2250: 	}
 2251:       if (async_shutdown_flag)
 2252: 	{
 2253: 	  io_log_event(async_shutdown, NULL);
 2254: 	  async_shutdown();
 2255: 	  async_shutdown_flag = 0;
 2256: 	  continue;
 2257: 	}
 2258: 
 2259:       /* And finally enter poll() to find active sockets */
 2260:       watchdog_stop();
 2261:       pout = poll(pfd, nfds, poll_tout);
 2262:       watchdog_start();
 2263: 
 2264:       if (pout < 0)
 2265: 	{
 2266: 	  if (errno == EINTR || errno == EAGAIN)
 2267: 	    continue;
 2268: 	  die("poll: %m");
 2269: 	}
 2270:       if (pout)
 2271: 	{
 2272: 	  times_update(&main_timeloop);
 2273: 
 2274: 	  /* guaranteed to be non-empty */
 2275: 	  current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
 2276: 
 2277: 	  while (current_sock)
 2278: 	    {
 2279: 	      sock *s = current_sock;
 2280: 	      if (s->index == -1)
 2281: 		{
 2282: 		  current_sock = sk_next(s);
 2283: 		  goto next;
 2284: 		}
 2285: 
 2286: 	      int e;
 2287: 	      int steps;
 2288: 
 2289: 	      steps = MAX_STEPS;
 2290: 	      if (s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
 2291: 		do
 2292: 		  {
 2293: 		    steps--;
 2294: 		    io_log_event(s->rx_hook, s->data);
 2295: 		    e = sk_read(s, pfd[s->index].revents);
 2296: 		    if (s != current_sock)
 2297: 		      goto next;
 2298: 		  }
 2299: 		while (e && s->rx_hook && steps);
 2300: 
 2301: 	      steps = MAX_STEPS;
 2302: 	      if (pfd[s->index].revents & POLLOUT)
 2303: 		do
 2304: 		  {
 2305: 		    steps--;
 2306: 		    io_log_event(s->tx_hook, s->data);
 2307: 		    e = sk_write(s);
 2308: 		    if (s != current_sock)
 2309: 		      goto next;
 2310: 		  }
 2311: 		while (e && steps);
 2312: 
 2313: 	      current_sock = sk_next(s);
 2314: 	    next: ;
 2315: 	    }
 2316: 
 2317: 	  short_loops++;
 2318: 	  if (events && (short_loops < SHORT_LOOP_MAX))
 2319: 	    continue;
 2320: 	  short_loops = 0;
 2321: 
 2322: 	  int count = 0;
 2323: 	  current_sock = stored_sock;
 2324: 	  if (current_sock == NULL)
 2325: 	    current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
 2326: 
 2327: 	  while (current_sock && count < MAX_RX_STEPS)
 2328: 	    {
 2329: 	      sock *s = current_sock;
 2330: 	      if (s->index == -1)
 2331: 		{
 2332: 		  current_sock = sk_next(s);
 2333: 		  goto next2;
 2334: 		}
 2335: 
 2336: 	      if (!s->fast_rx && (pfd[s->index].revents & POLLIN) && s->rx_hook)
 2337: 		{
 2338: 		  count++;
 2339: 		  io_log_event(s->rx_hook, s->data);
 2340: 		  sk_read(s, pfd[s->index].revents);
 2341: 		  if (s != current_sock)
 2342: 		    goto next2;
 2343: 		}
 2344: 
 2345: 	      if (pfd[s->index].revents & (POLLHUP | POLLERR))
 2346: 		{
 2347: 		  sk_err(s, pfd[s->index].revents);
 2348: 		  if (s != current_sock)
 2349: 		    goto next2;
 2350: 		}
 2351: 
 2352: 	      current_sock = sk_next(s);
 2353: 	    next2: ;
 2354: 	    }
 2355: 
 2356: 
 2357: 	  stored_sock = current_sock;
 2358: 	}
 2359:     }
 2360: }
 2361: 
 2362: void
 2363: test_old_bird(char *path)
 2364: {
 2365:   int fd;
 2366:   struct sockaddr_un sa;
 2367: 
 2368:   fd = socket(AF_UNIX, SOCK_STREAM, 0);
 2369:   if (fd < 0)
 2370:     die("Cannot create socket: %m");
 2371:   if (strlen(path) >= sizeof(sa.sun_path))
 2372:     die("Socket path too long");
 2373:   bzero(&sa, sizeof(sa));
 2374:   sa.sun_family = AF_UNIX;
 2375:   strcpy(sa.sun_path, path);
 2376:   if (connect(fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) == 0)
 2377:     die("I found another BIRD running.");
 2378:   close(fd);
 2379: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>