File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / bird / sysdep / linux / netlink.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Mar 17 19:50:23 2021 UTC (4 years ago) by misho
Branches: bird, MAIN
CVS tags: v1_6_8p3, HEAD
bird 1.6.8

    1: /*
    2:  *	BIRD -- Linux Netlink Interface
    3:  *
    4:  *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
    5:  *
    6:  *	Can be freely distributed and used under the terms of the GNU GPL.
    7:  */
    8: 
    9: #include <stdio.h>
   10: #include <unistd.h>
   11: #include <fcntl.h>
   12: #include <sys/socket.h>
   13: #include <sys/uio.h>
   14: #include <errno.h>
   15: 
   16: #undef LOCAL_DEBUG
   17: 
   18: #include "nest/bird.h"
   19: #include "nest/route.h"
   20: #include "nest/protocol.h"
   21: #include "nest/iface.h"
   22: #include "lib/timer.h"
   23: #include "lib/unix.h"
   24: #include "lib/krt.h"
   25: #include "lib/socket.h"
   26: #include "lib/string.h"
   27: #include "lib/hash.h"
   28: #include "conf/conf.h"
   29: 
   30: #include <asm/types.h>
   31: #include <linux/if.h>
   32: #include <linux/netlink.h>
   33: #include <linux/rtnetlink.h>
   34: 
   35: 
   36: #ifndef MSG_TRUNC			/* Hack: Several versions of glibc miss this one :( */
   37: #define MSG_TRUNC 0x20
   38: #endif
   39: 
   40: #ifndef IFA_FLAGS
   41: #define IFA_FLAGS 8
   42: #endif
   43: 
   44: #ifndef IFF_LOWER_UP
   45: #define IFF_LOWER_UP 0x10000
   46: #endif
   47: 
   48: #ifndef RTA_TABLE
   49: #define RTA_TABLE  15
   50: #endif
   51: 
   52: 
   53: #ifdef IPV6
   54: #define krt_ecmp6(X) 1
   55: #else
   56: #define krt_ecmp6(X) 0
   57: #endif
   58: 
   59: /*
   60:  * Structure nl_parse_state keeps state of received route processing. Ideally,
   61:  * we could just independently parse received Netlink messages and immediately
   62:  * propagate received routes to the rest of BIRD, but older Linux kernel (before
   63:  * version 4.11) represents and announces IPv6 ECMP routes not as one route with
   64:  * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
   65:  * routes with the same prefix. More recent kernels work as with IPv4.
   66:  *
   67:  * Therefore, BIRD keeps currently processed route in nl_parse_state structure
   68:  * and postpones its propagation until we expect it to be final; i.e., when
   69:  * non-matching route is received or when the scan ends. When another matching
   70:  * route is received, it is merged with the already processed route to form an
   71:  * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
   72:  * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
   73:  * routes with RTA_MULTIPATH set are just considered non-matching.
   74:  *
   75:  * This is ignored for asynchronous notifications (every notification is handled
   76:  * as a separate route). It is not an issue for our routes, as we ignore such
   77:  * notifications anyways. But importing alien IPv6 ECMP routes does not work
   78:  * properly with older kernels.
   79:  *
   80:  * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
   81:  * for the same prefix.
   82:  */
   83: 
   84: struct nl_parse_state
   85: {
   86:   struct linpool *pool;
   87:   int scan;
   88:   int merge;
   89: 
   90:   net *net;
   91:   rta *attrs;
   92:   struct krt_proto *proto;
   93:   s8 new;
   94:   s8 krt_src;
   95:   u8 krt_type;
   96:   u8 krt_proto;
   97:   u32 krt_metric;
   98: };
   99: 
  100: /*
  101:  *	Synchronous Netlink interface
  102:  */
  103: 
  104: struct nl_sock
  105: {
  106:   int fd;
  107:   u32 seq;
  108:   byte *rx_buffer;			/* Receive buffer */
  109:   struct nlmsghdr *last_hdr;		/* Recently received packet */
  110:   uint last_size;
  111: };
  112: 
  113: #define NL_RX_SIZE 8192
  114: 
  115: #define NL_OP_DELETE	0
  116: #define NL_OP_ADD	(NLM_F_CREATE|NLM_F_EXCL)
  117: #define NL_OP_REPLACE	(NLM_F_CREATE|NLM_F_REPLACE)
  118: #define NL_OP_APPEND	(NLM_F_CREATE|NLM_F_APPEND)
  119: 
  120: static linpool *nl_linpool;
  121: 
  122: static struct nl_sock nl_scan = {.fd = -1};	/* Netlink socket for synchronous scan */
  123: static struct nl_sock nl_req  = {.fd = -1};	/* Netlink socket for requests */
  124: 
  125: static void
  126: nl_open_sock(struct nl_sock *nl)
  127: {
  128:   if (nl->fd < 0)
  129:     {
  130:       nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  131:       if (nl->fd < 0)
  132: 	die("Unable to open rtnetlink socket: %m");
  133:       nl->seq = now;
  134:       nl->rx_buffer = xmalloc(NL_RX_SIZE);
  135:       nl->last_hdr = NULL;
  136:       nl->last_size = 0;
  137:     }
  138: }
  139: 
  140: static void
  141: nl_open(void)
  142: {
  143:   nl_open_sock(&nl_scan);
  144:   nl_open_sock(&nl_req);
  145: }
  146: 
  147: static void
  148: nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
  149: {
  150:   struct sockaddr_nl sa;
  151: 
  152:   memset(&sa, 0, sizeof(sa));
  153:   sa.nl_family = AF_NETLINK;
  154:   nh->nlmsg_pid = 0;
  155:   nh->nlmsg_seq = ++(nl->seq);
  156:   if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
  157:     die("rtnetlink sendto: %m");
  158:   nl->last_hdr = NULL;
  159: }
  160: 
  161: static void
  162: nl_request_dump(int af, int cmd)
  163: {
  164:   struct {
  165:     struct nlmsghdr nh;
  166:     struct rtgenmsg g;
  167:   } req = {
  168:     .nh.nlmsg_type = cmd,
  169:     .nh.nlmsg_len = sizeof(req),
  170:     .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
  171:     .g.rtgen_family = af
  172:   };
  173:   nl_send(&nl_scan, &req.nh);
  174: }
  175: 
  176: static struct nlmsghdr *
  177: nl_get_reply(struct nl_sock *nl)
  178: {
  179:   for(;;)
  180:     {
  181:       if (!nl->last_hdr)
  182: 	{
  183: 	  struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
  184: 	  struct sockaddr_nl sa;
  185: 	  struct msghdr m = {
  186: 	    .msg_name = &sa,
  187: 	    .msg_namelen = sizeof(sa),
  188: 	    .msg_iov = &iov,
  189: 	    .msg_iovlen = 1,
  190: 	  };
  191: 	  int x = recvmsg(nl->fd, &m, 0);
  192: 	  if (x < 0)
  193: 	    die("nl_get_reply: %m");
  194: 	  if (sa.nl_pid)		/* It isn't from the kernel */
  195: 	    {
  196: 	      DBG("Non-kernel packet\n");
  197: 	      continue;
  198: 	    }
  199: 	  nl->last_size = x;
  200: 	  nl->last_hdr = (void *) nl->rx_buffer;
  201: 	  if (m.msg_flags & MSG_TRUNC)
  202: 	    bug("nl_get_reply: got truncated reply which should be impossible");
  203: 	}
  204:       if (NLMSG_OK(nl->last_hdr, nl->last_size))
  205: 	{
  206: 	  struct nlmsghdr *h = nl->last_hdr;
  207: 	  nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
  208: 	  if (h->nlmsg_seq != nl->seq)
  209: 	    {
  210: 	      log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
  211: 		  h->nlmsg_seq, nl->seq);
  212: 	      continue;
  213: 	    }
  214: 	  return h;
  215: 	}
  216:       if (nl->last_size)
  217: 	log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
  218:       nl->last_hdr = NULL;
  219:     }
  220: }
  221: 
  222: static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
  223: 
  224: static int
  225: nl_error(struct nlmsghdr *h, int ignore_esrch)
  226: {
  227:   struct nlmsgerr *e;
  228:   int ec;
  229: 
  230:   if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
  231:     {
  232:       log(L_WARN "Netlink: Truncated error message received");
  233:       return ENOBUFS;
  234:     }
  235:   e = (struct nlmsgerr *) NLMSG_DATA(h);
  236:   ec = -e->error;
  237:   if (ec && !(ignore_esrch && (ec == ESRCH)))
  238:     log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
  239:   return ec;
  240: }
  241: 
  242: static struct nlmsghdr *
  243: nl_get_scan(void)
  244: {
  245:   struct nlmsghdr *h = nl_get_reply(&nl_scan);
  246: 
  247:   if (h->nlmsg_type == NLMSG_DONE)
  248:     return NULL;
  249:   if (h->nlmsg_type == NLMSG_ERROR)
  250:     {
  251:       nl_error(h, 0);
  252:       return NULL;
  253:     }
  254:   return h;
  255: }
  256: 
  257: static int
  258: nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
  259: {
  260:   struct nlmsghdr *h;
  261: 
  262:   nl_send(&nl_req, pkt);
  263:   for(;;)
  264:     {
  265:       h = nl_get_reply(&nl_req);
  266:       if (h->nlmsg_type == NLMSG_ERROR)
  267: 	break;
  268:       log(L_WARN "nl_exchange: Unexpected reply received");
  269:     }
  270:   return nl_error(h, ignore_esrch) ? -1 : 0;
  271: }
  272: 
  273: /*
  274:  *	Netlink attributes
  275:  */
  276: 
  277: static int nl_attr_len;
  278: 
  279: static void *
  280: nl_checkin(struct nlmsghdr *h, int lsize)
  281: {
  282:   nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
  283:   if (nl_attr_len < 0)
  284:     {
  285:       log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
  286:       return NULL;
  287:     }
  288:   return NLMSG_DATA(h);
  289: }
  290: 
  291: struct nl_want_attrs {
  292:   u8 defined:1;
  293:   u8 checksize:1;
  294:   u8 size;
  295: };
  296: 
  297: 
  298: #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
  299: 
  300: static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
  301:   [IFLA_IFNAME]	  = { 1, 0, 0 },
  302:   [IFLA_MTU]	  = { 1, 1, sizeof(u32) },
  303:   [IFLA_MASTER]	  = { 1, 1, sizeof(u32) },
  304:   [IFLA_WIRELESS] = { 1, 0, 0 },
  305: };
  306: 
  307: 
  308: #define BIRD_IFA_MAX  (IFA_FLAGS+1)
  309: 
  310: #ifndef IPV6
  311: static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
  312:   [IFA_ADDRESS]	  = { 1, 1, sizeof(ip4_addr) },
  313:   [IFA_LOCAL]	  = { 1, 1, sizeof(ip4_addr) },
  314:   [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
  315:   [IFA_FLAGS]	  = { 1, 1, sizeof(u32) },
  316: };
  317: #else
  318: static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
  319:   [IFA_ADDRESS]	  = { 1, 1, sizeof(ip6_addr) },
  320:   [IFA_LOCAL]	  = { 1, 1, sizeof(ip6_addr) },
  321:   [IFA_FLAGS]	  = { 1, 1, sizeof(u32) },
  322: };
  323: #endif
  324: 
  325: 
  326: #define BIRD_RTA_MAX  (RTA_TABLE+1)
  327: 
  328: #ifndef IPV6
  329: static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = {
  330:   [RTA_GATEWAY]	  = { 1, 1, sizeof(ip4_addr) },
  331: };
  332: #else
  333: static struct nl_want_attrs mpnh_attr_want6[BIRD_RTA_MAX] = {
  334:   [RTA_GATEWAY]	  = { 1, 1, sizeof(ip6_addr) },
  335: };
  336: #endif
  337: 
  338: #ifndef IPV6
  339: static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
  340:   [RTA_DST]	  = { 1, 1, sizeof(ip4_addr) },
  341:   [RTA_OIF]	  = { 1, 1, sizeof(u32) },
  342:   [RTA_GATEWAY]	  = { 1, 1, sizeof(ip4_addr) },
  343:   [RTA_PRIORITY]  = { 1, 1, sizeof(u32) },
  344:   [RTA_PREFSRC]	  = { 1, 1, sizeof(ip4_addr) },
  345:   [RTA_METRICS]	  = { 1, 0, 0 },
  346:   [RTA_MULTIPATH] = { 1, 0, 0 },
  347:   [RTA_FLOW]	  = { 1, 1, sizeof(u32) },
  348:   [RTA_TABLE]	  = { 1, 1, sizeof(u32) },
  349: };
  350: #else
  351: static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
  352:   [RTA_DST]	  = { 1, 1, sizeof(ip6_addr) },
  353:   [RTA_IIF]	  = { 1, 1, sizeof(u32) },
  354:   [RTA_OIF]	  = { 1, 1, sizeof(u32) },
  355:   [RTA_GATEWAY]	  = { 1, 1, sizeof(ip6_addr) },
  356:   [RTA_PRIORITY]  = { 1, 1, sizeof(u32) },
  357:   [RTA_PREFSRC]	  = { 1, 1, sizeof(ip6_addr) },
  358:   [RTA_METRICS]	  = { 1, 0, 0 },
  359:   [RTA_MULTIPATH] = { 1, 0, 0 },
  360:   [RTA_FLOW]	  = { 1, 1, sizeof(u32) },
  361:   [RTA_TABLE]	  = { 1, 1, sizeof(u32) },
  362: };
  363: #endif
  364: 
  365: 
  366: static int
  367: nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
  368: {
  369:   int max = ksize / sizeof(struct rtattr *);
  370:   bzero(k, ksize);
  371: 
  372:   for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
  373:     {
  374:       if ((a->rta_type >= max) || !want[a->rta_type].defined)
  375: 	continue;
  376: 
  377:       if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
  378: 	{
  379: 	  log(L_ERR "nl_parse_attrs: Malformed message received");
  380: 	  return 0;
  381: 	}
  382: 
  383:       k[a->rta_type] = a;
  384:     }
  385: 
  386:   if (nl_attr_len)
  387:     {
  388:       log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
  389:       return 0;
  390:     }
  391: 
  392:   return 1;
  393: }
  394: 
  395: static inline u32 rta_get_u32(struct rtattr *a)
  396: { return *(u32 *) RTA_DATA(a); }
  397: 
  398: static inline ip4_addr rta_get_ip4(struct rtattr *a)
  399: { return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
  400: 
  401: static inline ip6_addr rta_get_ip6(struct rtattr *a)
  402: { return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
  403: 
  404: 
  405: struct rtattr *
  406: nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
  407: {
  408:   uint pos = NLMSG_ALIGN(h->nlmsg_len);
  409:   uint len = RTA_LENGTH(dlen);
  410: 
  411:   if (pos + len > bufsize)
  412:     bug("nl_add_attr: packet buffer overflow");
  413: 
  414:   struct rtattr *a = (struct rtattr *)((char *)h + pos);
  415:   a->rta_type = code;
  416:   a->rta_len = len;
  417:   h->nlmsg_len = pos + len;
  418: 
  419:   if (dlen > 0)
  420:     memcpy(RTA_DATA(a), data, dlen);
  421: 
  422:   return a;
  423: }
  424: 
  425: static inline void
  426: nl_add_attr_u32(struct nlmsghdr *h, unsigned bufsize, int code, u32 data)
  427: {
  428:   nl_add_attr(h, bufsize, code, &data, 4);
  429: }
  430: 
  431: static inline void
  432: nl_add_attr_ipa(struct nlmsghdr *h, unsigned bufsize, int code, ip_addr ipa)
  433: {
  434:   ipa_hton(ipa);
  435:   nl_add_attr(h, bufsize, code, &ipa, sizeof(ipa));
  436: }
  437: 
  438: static inline struct rtattr *
  439: nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
  440: {
  441:   return nl_add_attr(h, bufsize, code, NULL, 0);
  442: }
  443: 
  444: static inline void
  445: nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
  446: {
  447:   a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
  448: }
  449: 
  450: static inline struct rtnexthop *
  451: nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
  452: {
  453:   uint pos = NLMSG_ALIGN(h->nlmsg_len);
  454:   uint len = RTNH_LENGTH(0);
  455: 
  456:   if (pos + len > bufsize)
  457:     bug("nl_open_nexthop: packet buffer overflow");
  458: 
  459:   h->nlmsg_len = pos + len;
  460: 
  461:   return (void *)h + pos;
  462: }
  463: 
  464: static inline void
  465: nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
  466: {
  467:   nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
  468: }
  469: 
  470: static void
  471: nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh)
  472: {
  473:   struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
  474: 
  475:   for (; nh; nh = nh->next)
  476:   {
  477:     struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
  478: 
  479:     rtnh->rtnh_flags = 0;
  480:     rtnh->rtnh_hops = nh->weight;
  481:     rtnh->rtnh_ifindex = nh->iface->index;
  482: 
  483:     nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
  484: 
  485:     nl_close_nexthop(h, rtnh);
  486:   }
  487: 
  488:   nl_close_attr(h, a);
  489: }
  490: 
  491: static struct mpnh *
  492: nl_parse_multipath(struct krt_proto *p, struct rtattr *ra, int af)
  493: {
  494:   /* Temporary buffer for multicast nexthops */
  495:   static struct mpnh *nh_buffer;
  496:   static int nh_buf_size;	/* in number of structures */
  497:   static int nh_buf_used;
  498: 
  499:   struct rtattr *a[BIRD_RTA_MAX];
  500:   struct rtnexthop *nh = RTA_DATA(ra);
  501:   struct mpnh *rv, *first, **last;
  502:   unsigned len = RTA_PAYLOAD(ra);
  503: 
  504:   first = NULL;
  505:   last = &first;
  506:   nh_buf_used = 0;
  507: 
  508:   while (len)
  509:     {
  510:       /* Use RTNH_OK(nh,len) ?? */
  511:       if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
  512: 	return NULL;
  513: 
  514:       if (nh_buf_used == nh_buf_size)
  515:       {
  516: 	nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4;
  517: 	nh_buffer = xrealloc(nh_buffer, nh_buf_size * sizeof(struct mpnh));
  518:       }
  519:       *last = rv = nh_buffer + nh_buf_used++;
  520:       rv->next = NULL;
  521:       last = &(rv->next);
  522: 
  523:       rv->weight = nh->rtnh_hops;
  524:       rv->iface = if_find_by_index(nh->rtnh_ifindex);
  525:       if (!rv->iface)
  526: 	return NULL;
  527: 
  528:       /* Nonexistent RTNH_PAYLOAD ?? */
  529:       nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
  530:       switch (af)
  531:         {
  532: #ifndef IPV6
  533: 	case AF_INET:
  534: 	  if (!nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want4, a, sizeof(a)))
  535: 	    return NULL;
  536: 	  break;
  537: #else
  538: 	case AF_INET6:
  539: 	  if (!nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want6, a, sizeof(a)))
  540: 	    return NULL;
  541: 	  break;
  542: #endif
  543: 	default:
  544: 	  return NULL;
  545: 	}
  546: 
  547:       if (a[RTA_GATEWAY])
  548: 	{
  549: 	  memcpy(&rv->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(rv->gw));
  550: 	  ipa_ntoh(rv->gw);
  551: 
  552: 	  neighbor *ng = neigh_find2(&p->p, &rv->gw, rv->iface,
  553: 				     (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
  554: 	  if (!ng || (ng->scope == SCOPE_HOST))
  555: 	    return NULL;
  556: 	}
  557:       else
  558: 	return NULL;
  559: 
  560:       len -= NLMSG_ALIGN(nh->rtnh_len);
  561:       nh = RTNH_NEXT(nh);
  562:     }
  563: 
  564:   return first;
  565: }
  566: 
  567: static void
  568: nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
  569: {
  570:   struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
  571:   int t;
  572: 
  573:   for (t = 1; t < max; t++)
  574:     if (metrics[0] & (1 << t))
  575:       nl_add_attr_u32(h, bufsize, t, metrics[t]);
  576: 
  577:   nl_close_attr(h, a);
  578: }
  579: 
  580: static int
  581: nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
  582: {
  583:   struct rtattr *a = RTA_DATA(hdr);
  584:   int len = RTA_PAYLOAD(hdr);
  585: 
  586:   metrics[0] = 0;
  587:   for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
  588:   {
  589:     if (a->rta_type == RTA_UNSPEC)
  590:       continue;
  591: 
  592:     if (a->rta_type >= max)
  593:       continue;
  594: 
  595:     if (RTA_PAYLOAD(a) != 4)
  596:       return -1;
  597: 
  598:     metrics[0] |= 1 << a->rta_type;
  599:     metrics[a->rta_type] = rta_get_u32(a);
  600:   }
  601: 
  602:   if (len > 0)
  603:     return -1;
  604: 
  605:   return 0;
  606: }
  607: 
  608: 
  609: /*
  610:  *	Scanning of interfaces
  611:  */
  612: 
  613: static void
  614: nl_parse_link(struct nlmsghdr *h, int scan)
  615: {
  616:   struct ifinfomsg *i;
  617:   struct rtattr *a[BIRD_IFLA_MAX];
  618:   int new = h->nlmsg_type == RTM_NEWLINK;
  619:   struct iface f = {};
  620:   struct iface *ifi;
  621:   char *name;
  622:   u32 mtu, master = 0;
  623:   uint fl;
  624: 
  625:   if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
  626:     return;
  627:   if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
  628:     {
  629:       /*
  630:        * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
  631:        * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
  632:        * We simply ignore all such messages with IFLA_WIRELESS without notice.
  633:        */
  634: 
  635:       if (a[IFLA_WIRELESS])
  636: 	return;
  637: 
  638:       log(L_ERR "KIF: Malformed message received");
  639:       return;
  640:     }
  641: 
  642:   name = RTA_DATA(a[IFLA_IFNAME]);
  643:   mtu = rta_get_u32(a[IFLA_MTU]);
  644: 
  645:   if (a[IFLA_MASTER])
  646:     master = rta_get_u32(a[IFLA_MASTER]);
  647: 
  648:   ifi = if_find_by_index(i->ifi_index);
  649:   if (!new)
  650:     {
  651:       DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
  652:       if (!ifi)
  653: 	return;
  654: 
  655:       if_delete(ifi);
  656:     }
  657:   else
  658:     {
  659:       DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
  660:       if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
  661: 	if_delete(ifi);
  662: 
  663:       strncpy(f.name, name, sizeof(f.name)-1);
  664:       f.index = i->ifi_index;
  665:       f.mtu = mtu;
  666: 
  667:       f.master_index = master;
  668:       f.master = if_find_by_index(master);
  669: 
  670:       fl = i->ifi_flags;
  671:       if (fl & IFF_UP)
  672: 	f.flags |= IF_ADMIN_UP;
  673:       if (fl & IFF_LOWER_UP)
  674: 	f.flags |= IF_LINK_UP;
  675:       if (fl & IFF_LOOPBACK)		/* Loopback */
  676: 	f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
  677:       else if (fl & IFF_POINTOPOINT)	/* PtP */
  678: 	f.flags |= IF_MULTICAST;
  679:       else if (fl & IFF_BROADCAST)	/* Broadcast */
  680: 	f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
  681:       else
  682: 	f.flags |= IF_MULTIACCESS;	/* NBMA */
  683: 
  684:       if (fl & IFF_MULTICAST)
  685: 	f.flags |= IF_MULTICAST;
  686: 
  687:       ifi = if_update(&f);
  688: 
  689:       if (!scan)
  690: 	if_end_partial_update(ifi);
  691:     }
  692: }
  693: 
  694: static void
  695: nl_parse_addr(struct nlmsghdr *h, int scan)
  696: {
  697:   struct ifaddrmsg *i;
  698:   struct rtattr *a[BIRD_IFA_MAX];
  699:   int new = h->nlmsg_type == RTM_NEWADDR;
  700:   struct ifa ifa;
  701:   struct iface *ifi;
  702:   int scope;
  703:   u32 ifa_flags;
  704: 
  705:   if (!(i = nl_checkin(h, sizeof(*i))))
  706:     return;
  707: 
  708:   switch (i->ifa_family)
  709:     {
  710: #ifndef IPV6
  711:       case AF_INET:
  712: 	if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
  713: 	  return;
  714: 	if (!a[IFA_LOCAL])
  715: 	  {
  716: 	    log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
  717: 	    return;
  718: 	  }
  719: 	break;
  720: #else
  721:       case AF_INET6:
  722: 	if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
  723: 	  return;
  724: 	break;
  725: #endif
  726:       default:
  727: 	return;
  728:     }
  729: 
  730:   if (!a[IFA_ADDRESS])
  731:     {
  732:       log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
  733:       return;
  734:     }
  735: 
  736:   if (a[IFA_FLAGS])
  737:     ifa_flags = rta_get_u32(a[IFA_FLAGS]);
  738:   else
  739:     ifa_flags = i->ifa_flags;
  740: 
  741:   ifi = if_find_by_index(i->ifa_index);
  742:   if (!ifi)
  743:     {
  744:       log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
  745:       return;
  746:     }
  747: 
  748:   bzero(&ifa, sizeof(ifa));
  749:   ifa.iface = ifi;
  750:   if (ifa_flags & IFA_F_SECONDARY)
  751:     ifa.flags |= IA_SECONDARY;
  752: 
  753: #ifdef IPV6
  754:   /* Ignore tentative addresses silently */
  755:   if (ifa_flags & IFA_F_TENTATIVE)
  756:     return;
  757: #endif
  758: 
  759:   /* IFA_LOCAL can be unset for IPv6 interfaces */
  760:   memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip));
  761:   ipa_ntoh(ifa.ip);
  762:   ifa.pxlen = i->ifa_prefixlen;
  763:   if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS)
  764:     {
  765:       log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
  766:       new = 0;
  767:     }
  768:   if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS)
  769:     {
  770:       ip_addr addr;
  771:       memcpy(&addr, RTA_DATA(a[IFA_ADDRESS]), sizeof(addr));
  772:       ipa_ntoh(addr);
  773:       ifa.prefix = ifa.brd = addr;
  774: 
  775:       /* It is either a host address or a peer address */
  776:       if (ipa_equal(ifa.ip, addr))
  777: 	ifa.flags |= IA_HOST;
  778:       else
  779: 	{
  780: 	  ifa.flags |= IA_PEER;
  781: 	  ifa.opposite = addr;
  782: 	}
  783:     }
  784:   else
  785:     {
  786:       ip_addr netmask = ipa_mkmask(ifa.pxlen);
  787:       ifa.prefix = ipa_and(ifa.ip, netmask);
  788:       ifa.brd = ipa_or(ifa.ip, ipa_not(netmask));
  789:       if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 1)
  790: 	ifa.opposite = ipa_opposite_m1(ifa.ip);
  791: 
  792: #ifndef IPV6
  793:       if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 2)
  794: 	ifa.opposite = ipa_opposite_m2(ifa.ip);
  795: 
  796:       if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
  797: 	{
  798: 	  ip_addr xbrd;
  799: 	  memcpy(&xbrd, RTA_DATA(a[IFA_BROADCAST]), sizeof(xbrd));
  800: 	  ipa_ntoh(xbrd);
  801: 	  if (ipa_equal(xbrd, ifa.prefix) || ipa_equal(xbrd, ifa.brd))
  802: 	    ifa.brd = xbrd;
  803: 	  else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
  804: 	    log(L_ERR "KIF: Invalid broadcast address %I for %s", xbrd, ifi->name);
  805: 	}
  806: #endif
  807:     }
  808: 
  809:   scope = ipa_classify(ifa.ip);
  810:   if (scope < 0)
  811:     {
  812:       log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
  813:       return;
  814:     }
  815:   ifa.scope = scope & IADDR_SCOPE_MASK;
  816: 
  817:   DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n",
  818:       ifi->index, ifi->name,
  819:       new ? "added" : "removed",
  820:       ifa.ip, ifa.flags, ifa.prefix, ifa.pxlen, ifa.brd, ifa.opposite);
  821: 
  822:   if (new)
  823:     ifa_update(&ifa);
  824:   else
  825:     ifa_delete(&ifa);
  826: 
  827:   if (!scan)
  828:     if_end_partial_update(ifi);
  829: }
  830: 
  831: void
  832: kif_do_scan(struct kif_proto *p UNUSED)
  833: {
  834:   struct nlmsghdr *h;
  835: 
  836:   if_start_update();
  837: 
  838:   nl_request_dump(AF_UNSPEC, RTM_GETLINK);
  839:   while (h = nl_get_scan())
  840:     if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
  841:       nl_parse_link(h, 1);
  842:     else
  843:       log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
  844: 
  845:   /* Re-resolve master interface for slaves */
  846:   struct iface *i;
  847:   WALK_LIST(i, iface_list)
  848:     if (i->master_index)
  849:     {
  850:       struct iface f = {
  851: 	.flags = i->flags,
  852: 	.mtu = i->mtu,
  853: 	.index = i->index,
  854: 	.master_index = i->master_index,
  855: 	.master = if_find_by_index(i->master_index)
  856:       };
  857: 
  858:       if (f.master != i->master)
  859:       {
  860: 	memcpy(f.name, i->name, sizeof(f.name));
  861: 	if_update(&f);
  862:       }
  863:     }
  864: 
  865:   nl_request_dump(BIRD_AF, RTM_GETADDR);
  866:   while (h = nl_get_scan())
  867:     if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
  868:       nl_parse_addr(h, 1);
  869:     else
  870:       log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
  871: 
  872:   if_end_update();
  873: }
  874: 
  875: /*
  876:  *	Routes
  877:  */
  878: 
  879: static inline u32
  880: krt_table_id(struct krt_proto *p)
  881: {
  882:   return KRT_CF->sys.table_id;
  883: }
  884: 
  885: static HASH(struct krt_proto) nl_table_map;
  886: 
  887: #define RTH_FN(k)	u32_hash(k)
  888: #define RTH_EQ(k1,k2)	k1 == k2
  889: #define RTH_KEY(p)	krt_table_id(p)
  890: #define RTH_NEXT(p)	p->sys.hash_next
  891: 
  892: #define RTH_REHASH		rth_rehash
  893: #define RTH_PARAMS		/8, *2, 2, 2, 6, 20
  894: 
  895: HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
  896: 
  897: int
  898: krt_capable(rte *e)
  899: {
  900:   rta *a = e->attrs;
  901: 
  902:   if (a->cast != RTC_UNICAST)
  903:     return 0;
  904: 
  905:   switch (a->dest)
  906:     {
  907:     case RTD_ROUTER:
  908:     case RTD_DEVICE:
  909:       if (a->iface == NULL)
  910: 	return 0;
  911:     case RTD_BLACKHOLE:
  912:     case RTD_UNREACHABLE:
  913:     case RTD_PROHIBIT:
  914:     case RTD_MULTIPATH:
  915:       break;
  916:     default:
  917:       return 0;
  918:     }
  919:   return 1;
  920: }
  921: 
  922: static inline int
  923: nh_bufsize(struct mpnh *nh)
  924: {
  925:   int rv = 0;
  926:   for (; nh != NULL; nh = nh->next)
  927:     rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
  928:   return rv;
  929: }
  930: 
  931: static int
  932: nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, struct iface *iface)
  933: {
  934:   eattr *ea;
  935:   net *net = e->net;
  936:   rta *a = e->attrs;
  937:   u32 priority = 0;
  938: 
  939:   struct {
  940:     struct nlmsghdr h;
  941:     struct rtmsg r;
  942:     char buf[0];
  943:   } *r;
  944: 
  945:   uint rsize = sizeof(*r) + 128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops);
  946:   r = alloca(rsize);
  947: 
  948:   DBG("nl_send_route(%I/%d,op=%x)\n", net->n.prefix, net->n.pxlen, op);
  949: 
  950:   bzero(&r->h, sizeof(r->h));
  951:   bzero(&r->r, sizeof(r->r));
  952:   r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
  953:   r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
  954:   r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
  955: 
  956:   r->r.rtm_family = BIRD_AF;
  957:   r->r.rtm_dst_len = net->n.pxlen;
  958:   r->r.rtm_protocol = RTPROT_BIRD;
  959:   r->r.rtm_scope = RT_SCOPE_NOWHERE;
  960:   nl_add_attr_ipa(&r->h, rsize, RTA_DST, net->n.prefix);
  961: 
  962:   /*
  963:    * Strange behavior for RTM_DELROUTE:
  964:    * 1) rtm_family is ignored in IPv6, works for IPv4
  965:    * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
  966:    * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
  967:    */
  968: 
  969:   if (krt_table_id(p) < 256)
  970:     r->r.rtm_table = krt_table_id(p);
  971:   else
  972:     nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
  973: 
  974:   if (a->source == RTS_DUMMY)
  975:     priority = e->u.krt.metric;
  976:   else if (KRT_CF->sys.metric)
  977:     priority = KRT_CF->sys.metric;
  978:   else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
  979:     priority = ea->u.data;
  980: 
  981:   if (priority)
  982:     nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
  983: 
  984:   /* For route delete, we do not specify remaining route attributes */
  985:   if (op == NL_OP_DELETE)
  986:     goto dest;
  987: 
  988:   /* Default scope is LINK for device routes, UNIVERSE otherwise */
  989:   if (ea = ea_find(eattrs, EA_KRT_SCOPE))
  990:     r->r.rtm_scope = ea->u.data;
  991:   else
  992:     r->r.rtm_scope = (dest == RTD_DEVICE) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
  993: 
  994:   if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
  995:     nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
  996: 
  997:   if (ea = ea_find(eattrs, EA_KRT_REALM))
  998:     nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
  999: 
 1000: 
 1001:   u32 metrics[KRT_METRICS_MAX];
 1002:   metrics[0] = 0;
 1003: 
 1004:   struct ea_walk_state ews = { .eattrs = eattrs };
 1005:   while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
 1006:   {
 1007:     int id = ea->id - EA_KRT_METRICS;
 1008:     metrics[0] |= 1 << id;
 1009:     metrics[id] = ea->u.data;
 1010:   }
 1011: 
 1012:   if (metrics[0])
 1013:     nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
 1014: 
 1015: 
 1016: dest:
 1017:   /* a->iface != NULL checked in krt_capable() for router and device routes */
 1018:   switch (dest)
 1019:     {
 1020:     case RTD_ROUTER:
 1021:       r->r.rtm_type = RTN_UNICAST;
 1022:       nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index);
 1023:       nl_add_attr_ipa(&r->h, rsize, RTA_GATEWAY, gw);
 1024:       break;
 1025:     case RTD_DEVICE:
 1026:       r->r.rtm_type = RTN_UNICAST;
 1027:       nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index);
 1028:       break;
 1029:     case RTD_BLACKHOLE:
 1030:       r->r.rtm_type = RTN_BLACKHOLE;
 1031:       break;
 1032:     case RTD_UNREACHABLE:
 1033:       r->r.rtm_type = RTN_UNREACHABLE;
 1034:       break;
 1035:     case RTD_PROHIBIT:
 1036:       r->r.rtm_type = RTN_PROHIBIT;
 1037:       break;
 1038:     case RTD_MULTIPATH:
 1039:       r->r.rtm_type = RTN_UNICAST;
 1040:       nl_add_multipath(&r->h, rsize, a->nexthops);
 1041:       break;
 1042:     case RTD_NONE:
 1043:       break;
 1044:     default:
 1045:       bug("krt_capable inconsistent with nl_send_route");
 1046:     }
 1047: 
 1048:   /* Ignore missing for DELETE */
 1049:   return nl_exchange(&r->h, (op == NL_OP_DELETE));
 1050: }
 1051: 
 1052: static inline int
 1053: nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
 1054: {
 1055:   rta *a = e->attrs;
 1056:   int err = 0;
 1057: 
 1058:   if (krt_ecmp6(p) && (a->dest == RTD_MULTIPATH))
 1059:   {
 1060:     struct mpnh *nh = a->nexthops;
 1061: 
 1062:     err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface);
 1063:     if (err < 0)
 1064:       return err;
 1065: 
 1066:     for (nh = nh->next; nh; nh = nh->next)
 1067:       err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface);
 1068: 
 1069:     return err;
 1070:   }
 1071: 
 1072:   return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface);
 1073: }
 1074: 
 1075: static inline int
 1076: nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
 1077: {
 1078:   int err = 0;
 1079: 
 1080:   /* For IPv6, we just repeatedly request DELETE until we get error */
 1081:   do
 1082:     err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL);
 1083:   while (krt_ecmp6(p) && !err);
 1084: 
 1085:   return err;
 1086: }
 1087: 
 1088: void
 1089: krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list *eattrs)
 1090: {
 1091:   int err = 0;
 1092: 
 1093:   /*
 1094:    * We could use NL_OP_REPLACE, but route replace on Linux has some problems:
 1095:    *
 1096:    * 1) Does not check for matching rtm_protocol
 1097:    * 2) Has broken semantics for IPv6 ECMP
 1098:    * 3) Crashes some kernel version when used for IPv6 ECMP
 1099:    *
 1100:    * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
 1101:    * route value, so we do not try to optimize IPv6 ECMP reconfigurations.
 1102:    */
 1103: 
 1104:   if (old)
 1105:     nl_delete_rte(p, old, eattrs);
 1106: 
 1107:   if (new)
 1108:     err = nl_add_rte(p, new, eattrs);
 1109: 
 1110:   if (err < 0)
 1111:     n->n.flags |= KRF_SYNC_ERROR;
 1112:   else
 1113:     n->n.flags &= ~KRF_SYNC_ERROR;
 1114: }
 1115: 
 1116: 
 1117: static inline struct mpnh *
 1118: nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight)
 1119: {
 1120:   struct mpnh *nh = lp_alloc(s->pool, sizeof(struct mpnh));
 1121: 
 1122:   nh->gw = gw;
 1123:   nh->iface = iface;
 1124:   nh->next = NULL;
 1125:   nh->weight = weight;
 1126: 
 1127:   return nh;
 1128: }
 1129: 
 1130: static int
 1131: nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type)
 1132: {
 1133:   /* Route merging must be active */
 1134:   if (!s->merge)
 1135:     return 0;
 1136: 
 1137:   /* Saved and new route must have same network, proto/table, and priority */
 1138:   if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
 1139:     return 0;
 1140: 
 1141:   /* Both must be regular unicast routes */
 1142:   if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
 1143:     return 0;
 1144: 
 1145:   return 1;
 1146: }
 1147: 
 1148: static void
 1149: nl_announce_route(struct nl_parse_state *s)
 1150: {
 1151:   rte *e = rte_get_temp(s->attrs);
 1152:   e->net = s->net;
 1153:   e->u.krt.src = s->krt_src;
 1154:   e->u.krt.proto = s->krt_proto;
 1155:   e->u.krt.seen = 0;
 1156:   e->u.krt.best = 0;
 1157:   e->u.krt.metric = s->krt_metric;
 1158: 
 1159:   if (s->scan)
 1160:     krt_got_route(s->proto, e);
 1161:   else
 1162:     krt_got_route_async(s->proto, e, s->new);
 1163: 
 1164:   s->net = NULL;
 1165:   s->attrs = NULL;
 1166:   s->proto = NULL;
 1167:   lp_flush(s->pool);
 1168: }
 1169: 
 1170: static inline void
 1171: nl_parse_begin(struct nl_parse_state *s, int scan, int merge)
 1172: {
 1173:   memset(s, 0, sizeof (struct nl_parse_state));
 1174:   s->pool = nl_linpool;
 1175:   s->scan = scan;
 1176:   s->merge = merge;
 1177: }
 1178: 
 1179: static inline void
 1180: nl_parse_end(struct nl_parse_state *s)
 1181: {
 1182:   if (s->net)
 1183:     nl_announce_route(s);
 1184: }
 1185: 
 1186: 
 1187: #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
 1188: 
 1189: static void
 1190: nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
 1191: {
 1192:   struct krt_proto *p;
 1193:   struct rtmsg *i;
 1194:   struct rtattr *a[BIRD_RTA_MAX];
 1195:   int new = h->nlmsg_type == RTM_NEWROUTE;
 1196: 
 1197:   ip_addr dst = IPA_NONE;
 1198:   u32 oif = ~0;
 1199:   u32 table;
 1200:   u32 priority = 0;
 1201:   u32 def_scope = RT_SCOPE_UNIVERSE;
 1202:   int src;
 1203: 
 1204:   if (!(i = nl_checkin(h, sizeof(*i))))
 1205:     return;
 1206: 
 1207:   switch (i->rtm_family)
 1208:     {
 1209: #ifndef IPV6
 1210:       case AF_INET:
 1211: 	if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
 1212: 	  return;
 1213: 	break;
 1214: #else
 1215:       case AF_INET6:
 1216: 	if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
 1217: 	  return;
 1218: 	break;
 1219: #endif
 1220:       default:
 1221: 	return;
 1222:     }
 1223: 
 1224:   if (a[RTA_DST])
 1225:     {
 1226:       memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst));
 1227:       ipa_ntoh(dst);
 1228:     }
 1229: 
 1230:   if (a[RTA_OIF])
 1231:     oif = rta_get_u32(a[RTA_OIF]);
 1232: 
 1233:   if (a[RTA_TABLE])
 1234:     table = rta_get_u32(a[RTA_TABLE]);
 1235:   else
 1236:     table = i->rtm_table;
 1237: 
 1238:   p = HASH_FIND(nl_table_map, RTH, table); /* Do we know this table? */
 1239:   DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, table, i->rtm_protocol, p ? p->p.name : "(none)");
 1240:   if (!p)
 1241:     SKIP("unknown table %d\n", table);
 1242: 
 1243: #ifdef IPV6
 1244:   if (a[RTA_IIF])
 1245:     SKIP("IIF set\n");
 1246: #else
 1247:   if (i->rtm_tos != 0)			/* We don't support TOS */
 1248:     SKIP("TOS %02x\n", i->rtm_tos);
 1249: #endif
 1250: 
 1251:   if (s->scan && !new)
 1252:     SKIP("RTM_DELROUTE in scan\n");
 1253: 
 1254:   if (a[RTA_PRIORITY])
 1255:     priority = rta_get_u32(a[RTA_PRIORITY]);
 1256: 
 1257:   int c = ipa_classify_net(dst);
 1258:   if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
 1259:     SKIP("strange class/scope\n");
 1260: 
 1261:   switch (i->rtm_protocol)
 1262:     {
 1263:     case RTPROT_UNSPEC:
 1264:       SKIP("proto unspec\n");
 1265: 
 1266:     case RTPROT_REDIRECT:
 1267:       src = KRT_SRC_REDIRECT;
 1268:       break;
 1269: 
 1270:     case RTPROT_KERNEL:
 1271:       src = KRT_SRC_KERNEL;
 1272:       return;
 1273: 
 1274:     case RTPROT_BIRD:
 1275:       if (!s->scan)
 1276: 	SKIP("echo\n");
 1277:       src = KRT_SRC_BIRD;
 1278:       break;
 1279: 
 1280:     case RTPROT_BOOT:
 1281:     default:
 1282:       src = KRT_SRC_ALIEN;
 1283:     }
 1284: 
 1285:   net *net = net_get(p->p.table, dst, i->rtm_dst_len);
 1286: 
 1287:   if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type))
 1288:     nl_announce_route(s);
 1289: 
 1290:   rta *ra = lp_allocz(s->pool, sizeof(rta));
 1291:   ra->src = p->p.main_source;
 1292:   ra->source = RTS_INHERIT;
 1293:   ra->scope = SCOPE_UNIVERSE;
 1294:   ra->cast = RTC_UNICAST;
 1295: 
 1296:   switch (i->rtm_type)
 1297:     {
 1298:     case RTN_UNICAST:
 1299: 
 1300:       if (a[RTA_MULTIPATH])
 1301: 	{
 1302: 	  ra->dest = RTD_MULTIPATH;
 1303: 	  ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH], i->rtm_family);
 1304: 	  if (!ra->nexthops)
 1305: 	    {
 1306: 	      log(L_ERR "KRT: Received strange multipath route %I/%d",
 1307: 		  net->n.prefix, net->n.pxlen);
 1308: 	      return;
 1309: 	    }
 1310: 
 1311: 	  break;
 1312: 	}
 1313: 
 1314:       ra->iface = if_find_by_index(oif);
 1315:       if (!ra->iface)
 1316: 	{
 1317: 	  log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
 1318: 	      net->n.prefix, net->n.pxlen, oif);
 1319: 	  return;
 1320: 	}
 1321: 
 1322:       if (a[RTA_GATEWAY])
 1323: 	{
 1324: 	  neighbor *ng;
 1325: 	  ra->dest = RTD_ROUTER;
 1326: 	  memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw));
 1327: 	  ipa_ntoh(ra->gw);
 1328: 
 1329: #ifdef IPV6
 1330: 	  /* Silently skip strange 6to4 routes */
 1331: 	  if (ipa_in_net(ra->gw, IPA_NONE, 96))
 1332: 	    return;
 1333: #endif
 1334: 
 1335: 	  ng = neigh_find2(&p->p, &ra->gw, ra->iface,
 1336: 			   (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
 1337: 	  if (!ng || (ng->scope == SCOPE_HOST))
 1338: 	    {
 1339: 	      log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",
 1340: 		  net->n.prefix, net->n.pxlen, ra->gw);
 1341: 	      return;
 1342: 	    }
 1343: 	}
 1344:       else
 1345: 	{
 1346: 	  ra->dest = RTD_DEVICE;
 1347: 	  def_scope = RT_SCOPE_LINK;
 1348: 	}
 1349: 
 1350:       break;
 1351:     case RTN_BLACKHOLE:
 1352:       ra->dest = RTD_BLACKHOLE;
 1353:       break;
 1354:     case RTN_UNREACHABLE:
 1355:       ra->dest = RTD_UNREACHABLE;
 1356:       break;
 1357:     case RTN_PROHIBIT:
 1358:       ra->dest = RTD_PROHIBIT;
 1359:       break;
 1360:     /* FIXME: What about RTN_THROW? */
 1361:     default:
 1362:       SKIP("type %d\n", i->rtm_type);
 1363:       return;
 1364:     }
 1365: 
 1366:   if (i->rtm_scope != def_scope)
 1367:     {
 1368:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
 1369:       ea->next = ra->eattrs;
 1370:       ra->eattrs = ea;
 1371:       ea->flags = EALF_SORTED;
 1372:       ea->count = 1;
 1373:       ea->attrs[0].id = EA_KRT_SCOPE;
 1374:       ea->attrs[0].flags = 0;
 1375:       ea->attrs[0].type = EAF_TYPE_INT;
 1376:       ea->attrs[0].u.data = i->rtm_scope;
 1377:     }
 1378: 
 1379:   if (a[RTA_PREFSRC])
 1380:     {
 1381:       ip_addr ps;
 1382:       memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps));
 1383:       ipa_ntoh(ps);
 1384: 
 1385:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
 1386:       ea->next = ra->eattrs;
 1387:       ra->eattrs = ea;
 1388:       ea->flags = EALF_SORTED;
 1389:       ea->count = 1;
 1390:       ea->attrs[0].id = EA_KRT_PREFSRC;
 1391:       ea->attrs[0].flags = 0;
 1392:       ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
 1393:       ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
 1394:       ea->attrs[0].u.ptr->length = sizeof(ps);
 1395:       memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
 1396:     }
 1397: 
 1398:   if (a[RTA_FLOW])
 1399:     {
 1400:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
 1401:       ea->next = ra->eattrs;
 1402:       ra->eattrs = ea;
 1403:       ea->flags = EALF_SORTED;
 1404:       ea->count = 1;
 1405:       ea->attrs[0].id = EA_KRT_REALM;
 1406:       ea->attrs[0].flags = 0;
 1407:       ea->attrs[0].type = EAF_TYPE_INT;
 1408:       ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
 1409:     }
 1410: 
 1411:   if (a[RTA_METRICS])
 1412:     {
 1413:       u32 metrics[KRT_METRICS_MAX];
 1414:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
 1415:       int t, n = 0;
 1416: 
 1417:       if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
 1418:         {
 1419: 	  log(L_ERR "KRT: Received route %I/%d with strange RTA_METRICS attribute",
 1420: 	      net->n.prefix, net->n.pxlen);
 1421: 	  return;
 1422: 	}
 1423: 
 1424:       for (t = 1; t < KRT_METRICS_MAX; t++)
 1425: 	if (metrics[0] & (1 << t))
 1426: 	  {
 1427: 	    ea->attrs[n].id = EA_CODE(EAP_KRT, KRT_METRICS_OFFSET + t);
 1428: 	    ea->attrs[n].flags = 0;
 1429: 	    ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
 1430: 	    ea->attrs[n].u.data = metrics[t];
 1431: 	    n++;
 1432: 	  }
 1433: 
 1434:       if (n > 0)
 1435:         {
 1436: 	  ea->next = ra->eattrs;
 1437: 	  ea->flags = EALF_SORTED;
 1438: 	  ea->count = n;
 1439: 	  ra->eattrs = ea;
 1440: 	}
 1441:     }
 1442: 
 1443:   /*
 1444:    * Ideally, now we would send the received route to the rest of kernel code.
 1445:    * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
 1446:    * postpone it and merge next hops until the end of the sequence. Note that
 1447:    * proper multipath updates are rejected by nl_mergable_route(), so it is
 1448:    * always the first case for them.
 1449:    */
 1450: 
 1451:   if (!s->net)
 1452:   {
 1453:     /* Store the new route */
 1454:     s->net = net;
 1455:     s->attrs = ra;
 1456:     s->proto = p;
 1457:     s->new = new;
 1458:     s->krt_src = src;
 1459:     s->krt_type = i->rtm_type;
 1460:     s->krt_proto = i->rtm_protocol;
 1461:     s->krt_metric = priority;
 1462:   }
 1463:   else
 1464:   {
 1465:     /* Merge next hops with the stored route */
 1466:     rta *a = s->attrs;
 1467: 
 1468:     if (a->dest != RTD_MULTIPATH)
 1469:     {
 1470:       a->dest = RTD_MULTIPATH;
 1471:       a->nexthops = nl_alloc_mpnh(s, a->gw, a->iface, 0);
 1472:     }
 1473: 
 1474:     mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->iface, 0));
 1475:   }
 1476: }
 1477: 
 1478: void
 1479: krt_do_scan(struct krt_proto *p UNUSED)	/* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
 1480: {
 1481:   struct nlmsghdr *h;
 1482:   struct nl_parse_state s;
 1483: 
 1484:   nl_parse_begin(&s, 1, krt_ecmp6(p));
 1485: 
 1486:   nl_request_dump(BIRD_AF, RTM_GETROUTE);
 1487:   while (h = nl_get_scan())
 1488:     if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
 1489:       nl_parse_route(&s, h);
 1490:     else
 1491:       log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
 1492: 
 1493:   nl_parse_end(&s);
 1494: }
 1495: 
 1496: /*
 1497:  *	Asynchronous Netlink interface
 1498:  */
 1499: 
 1500: static sock *nl_async_sk;		/* BIRD socket for asynchronous notifications */
 1501: static byte *nl_async_rx_buffer;	/* Receive buffer */
 1502: 
 1503: static void
 1504: nl_async_msg(struct nlmsghdr *h)
 1505: {
 1506:   struct nl_parse_state s;
 1507: 
 1508:   switch (h->nlmsg_type)
 1509:     {
 1510:     case RTM_NEWROUTE:
 1511:     case RTM_DELROUTE:
 1512:       DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
 1513:       nl_parse_begin(&s, 0, 0);
 1514:       nl_parse_route(&s, h);
 1515:       nl_parse_end(&s);
 1516:       break;
 1517:     case RTM_NEWLINK:
 1518:     case RTM_DELLINK:
 1519:       DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
 1520:       if (kif_proto)
 1521: 	nl_parse_link(h, 0);
 1522:       break;
 1523:     case RTM_NEWADDR:
 1524:     case RTM_DELADDR:
 1525:       DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
 1526:       if (kif_proto)
 1527: 	nl_parse_addr(h, 0);
 1528:       break;
 1529:     default:
 1530:       DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
 1531:     }
 1532: }
 1533: 
 1534: static int
 1535: nl_async_hook(sock *sk, uint size UNUSED)
 1536: {
 1537:   struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
 1538:   struct sockaddr_nl sa;
 1539:   struct msghdr m = {
 1540:     .msg_name = &sa,
 1541:     .msg_namelen = sizeof(sa),
 1542:     .msg_iov = &iov,
 1543:     .msg_iovlen = 1,
 1544:   };
 1545:   struct nlmsghdr *h;
 1546:   int x;
 1547:   uint len;
 1548: 
 1549:   x = recvmsg(sk->fd, &m, 0);
 1550:   if (x < 0)
 1551:     {
 1552:       if (errno == ENOBUFS)
 1553: 	{
 1554: 	  /*
 1555: 	   *  Netlink reports some packets have been thrown away.
 1556: 	   *  One day we might react to it by asking for route table
 1557: 	   *  scan in near future.
 1558: 	   */
 1559: 	  log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
 1560: 	  return 1;	/* More data are likely to be ready */
 1561: 	}
 1562:       else if (errno != EWOULDBLOCK)
 1563: 	log(L_ERR "Netlink recvmsg: %m");
 1564:       return 0;
 1565:     }
 1566:   if (sa.nl_pid)		/* It isn't from the kernel */
 1567:     {
 1568:       DBG("Non-kernel packet\n");
 1569:       return 1;
 1570:     }
 1571:   h = (void *) nl_async_rx_buffer;
 1572:   len = x;
 1573:   if (m.msg_flags & MSG_TRUNC)
 1574:     {
 1575:       log(L_WARN "Netlink got truncated asynchronous message");
 1576:       return 1;
 1577:     }
 1578:   while (NLMSG_OK(h, len))
 1579:     {
 1580:       nl_async_msg(h);
 1581:       h = NLMSG_NEXT(h, len);
 1582:     }
 1583:   if (len)
 1584:     log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
 1585:   return 1;
 1586: }
 1587: 
 1588: static void
 1589: nl_async_err_hook(sock *sk, int e UNUSED)
 1590: {
 1591:   nl_async_hook(sk, 0);
 1592: }
 1593: 
 1594: static void
 1595: nl_open_async(void)
 1596: {
 1597:   sock *sk;
 1598:   struct sockaddr_nl sa;
 1599:   int fd;
 1600: 
 1601:   if (nl_async_sk)
 1602:     return;
 1603: 
 1604:   DBG("KRT: Opening async netlink socket\n");
 1605: 
 1606:   fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
 1607:   if (fd < 0)
 1608:     {
 1609:       log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
 1610:       return;
 1611:     }
 1612: 
 1613:   bzero(&sa, sizeof(sa));
 1614:   sa.nl_family = AF_NETLINK;
 1615: #ifdef IPV6
 1616:   sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
 1617: #else
 1618:   sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE;
 1619: #endif
 1620:   if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
 1621:     {
 1622:       log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
 1623:       close(fd);
 1624:       return;
 1625:     }
 1626: 
 1627:   nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
 1628: 
 1629:   sk = nl_async_sk = sk_new(krt_pool);
 1630:   sk->type = SK_MAGIC;
 1631:   sk->rx_hook = nl_async_hook;
 1632:   sk->err_hook = nl_async_err_hook;
 1633:   sk->fd = fd;
 1634:   if (sk_open(sk) < 0)
 1635:     bug("Netlink: sk_open failed");
 1636: }
 1637: 
 1638: 
 1639: /*
 1640:  *	Interface to the UNIX krt module
 1641:  */
 1642: 
 1643: void
 1644: krt_sys_io_init(void)
 1645: {
 1646:   nl_linpool = lp_new(krt_pool, 4080);
 1647:   HASH_INIT(nl_table_map, krt_pool, 6);
 1648: }
 1649: 
 1650: int
 1651: krt_sys_start(struct krt_proto *p)
 1652: {
 1653:   struct krt_proto *old = HASH_FIND(nl_table_map, RTH, krt_table_id(p));
 1654: 
 1655:   if (old)
 1656:     {
 1657:       log(L_ERR "%s: Kernel table %u already registered by %s",
 1658: 	  p->p.name, krt_table_id(p), old->p.name);
 1659:       return 0;
 1660:     }
 1661: 
 1662:   HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
 1663: 
 1664:   nl_open();
 1665:   nl_open_async();
 1666: 
 1667:   return 1;
 1668: }
 1669: 
 1670: void
 1671: krt_sys_shutdown(struct krt_proto *p)
 1672: {
 1673:   HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
 1674: }
 1675: 
 1676: int
 1677: krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
 1678: {
 1679:   return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
 1680: }
 1681: 
 1682: void
 1683: krt_sys_init_config(struct krt_config *cf)
 1684: {
 1685:   cf->sys.table_id = RT_TABLE_MAIN;
 1686:   cf->sys.metric = 0;
 1687: }
 1688: 
 1689: void
 1690: krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
 1691: {
 1692:   d->sys.table_id = s->sys.table_id;
 1693:   d->sys.metric = s->sys.metric;
 1694: }
 1695: 
 1696: static const char *krt_metrics_names[KRT_METRICS_MAX] = {
 1697:   NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
 1698:   "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
 1699: };
 1700: 
 1701: static const char *krt_features_names[KRT_FEATURES_MAX] = {
 1702:   "ecn", NULL, NULL, "allfrag"
 1703: };
 1704: 
 1705: int
 1706: krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
 1707: {
 1708:   switch (a->id)
 1709:   {
 1710:   case EA_KRT_PREFSRC:
 1711:     bsprintf(buf, "prefsrc");
 1712:     return GA_NAME;
 1713: 
 1714:   case EA_KRT_REALM:
 1715:     bsprintf(buf, "realm");
 1716:     return GA_NAME;
 1717: 
 1718:   case EA_KRT_SCOPE:
 1719:     bsprintf(buf, "scope");
 1720:     return GA_NAME;
 1721: 
 1722:   case EA_KRT_LOCK:
 1723:     buf += bsprintf(buf, "lock:");
 1724:     ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
 1725:     return GA_FULL;
 1726: 
 1727:   case EA_KRT_FEATURES:
 1728:     buf += bsprintf(buf, "features:");
 1729:     ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
 1730:     return GA_FULL;
 1731: 
 1732:   default:;
 1733:     int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
 1734:     if (id > 0 && id < KRT_METRICS_MAX)
 1735:     {
 1736:       bsprintf(buf, "%s", krt_metrics_names[id]);
 1737:       return GA_NAME;
 1738:     }
 1739: 
 1740:     return GA_UNKNOWN;
 1741:   }
 1742: }
 1743: 
 1744: 
 1745: 
 1746: void
 1747: kif_sys_start(struct kif_proto *p UNUSED)
 1748: {
 1749:   nl_open();
 1750:   nl_open_async();
 1751: }
 1752: 
 1753: void
 1754: kif_sys_shutdown(struct kif_proto *p UNUSED)
 1755: {
 1756: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>