File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / bird2 / sysdep / linux / netlink.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Oct 21 16:03:56 2019 UTC (5 years, 5 months ago) by misho
Branches: bird2, MAIN
CVS tags: v2_0_7p0, HEAD
bird2 ver 2.0.7

    1: /*
    2:  *	BIRD -- Linux Netlink Interface
    3:  *
    4:  *	(c) 1999--2000 Martin Mares <mj@ucw.cz>
    5:  *
    6:  *	Can be freely distributed and used under the terms of the GNU GPL.
    7:  */
    8: 
    9: #include <alloca.h>
   10: #include <stdio.h>
   11: #include <unistd.h>
   12: #include <fcntl.h>
   13: #include <sys/socket.h>
   14: #include <sys/uio.h>
   15: #include <errno.h>
   16: 
   17: #undef LOCAL_DEBUG
   18: 
   19: #include "nest/bird.h"
   20: #include "nest/route.h"
   21: #include "nest/protocol.h"
   22: #include "nest/iface.h"
   23: #include "lib/alloca.h"
   24: #include "sysdep/unix/unix.h"
   25: #include "sysdep/unix/krt.h"
   26: #include "lib/socket.h"
   27: #include "lib/string.h"
   28: #include "lib/hash.h"
   29: #include "conf/conf.h"
   30: 
   31: #include <asm/types.h>
   32: #include <linux/if.h>
   33: #include <linux/netlink.h>
   34: #include <linux/rtnetlink.h>
   35: 
   36: #ifdef HAVE_MPLS_KERNEL
   37: #include <linux/lwtunnel.h>
   38: #endif
   39: 
   40: #ifndef MSG_TRUNC			/* Hack: Several versions of glibc miss this one :( */
   41: #define MSG_TRUNC 0x20
   42: #endif
   43: 
   44: #ifndef IFA_FLAGS
   45: #define IFA_FLAGS 8
   46: #endif
   47: 
   48: #ifndef IFF_LOWER_UP
   49: #define IFF_LOWER_UP 0x10000
   50: #endif
   51: 
   52: #ifndef RTA_TABLE
   53: #define RTA_TABLE  15
   54: #endif
   55: 
   56: #ifndef RTA_VIA
   57: #define RTA_VIA	 18
   58: #endif
   59: 
   60: #ifndef RTA_NEWDST
   61: #define RTA_NEWDST  19
   62: #endif
   63: 
   64: #ifndef RTA_ENCAP_TYPE
   65: #define RTA_ENCAP_TYPE	21
   66: #endif
   67: 
   68: #ifndef RTA_ENCAP
   69: #define RTA_ENCAP  22
   70: #endif
   71: 
   72: #define krt_ipv4(p) ((p)->af == AF_INET)
   73: #define krt_ecmp6(p) ((p)->af == AF_INET6)
   74: 
   75: const int rt_default_ecmp = 16;
   76: 
   77: /*
   78:  * Structure nl_parse_state keeps state of received route processing. Ideally,
   79:  * we could just independently parse received Netlink messages and immediately
   80:  * propagate received routes to the rest of BIRD, but older Linux kernel (before
   81:  * version 4.11) represents and announces IPv6 ECMP routes not as one route with
   82:  * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
   83:  * routes with the same prefix. More recent kernels work as with IPv4.
   84:  *
   85:  * Therefore, BIRD keeps currently processed route in nl_parse_state structure
   86:  * and postpones its propagation until we expect it to be final; i.e., when
   87:  * non-matching route is received or when the scan ends. When another matching
   88:  * route is received, it is merged with the already processed route to form an
   89:  * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
   90:  * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
   91:  * routes with RTA_MULTIPATH set are just considered non-matching.
   92:  *
   93:  * This is ignored for asynchronous notifications (every notification is handled
   94:  * as a separate route). It is not an issue for our routes, as we ignore such
   95:  * notifications anyways. But importing alien IPv6 ECMP routes does not work
   96:  * properly with older kernels.
   97:  *
   98:  * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
   99:  * for the same prefix.
  100:  */
  101: 
  102: struct nl_parse_state
  103: {
  104:   struct linpool *pool;
  105:   int scan;
  106:   int merge;
  107: 
  108:   net *net;
  109:   rta *attrs;
  110:   struct krt_proto *proto;
  111:   s8 new;
  112:   s8 krt_src;
  113:   u8 krt_type;
  114:   u8 krt_proto;
  115:   u32 krt_metric;
  116: };
  117: 
  118: /*
  119:  *	Synchronous Netlink interface
  120:  */
  121: 
  122: struct nl_sock
  123: {
  124:   int fd;
  125:   u32 seq;
  126:   byte *rx_buffer;			/* Receive buffer */
  127:   struct nlmsghdr *last_hdr;		/* Recently received packet */
  128:   uint last_size;
  129: };
  130: 
  131: #define NL_RX_SIZE 8192
  132: 
  133: #define NL_OP_DELETE	0
  134: #define NL_OP_ADD	(NLM_F_CREATE|NLM_F_EXCL)
  135: #define NL_OP_REPLACE	(NLM_F_CREATE|NLM_F_REPLACE)
  136: #define NL_OP_APPEND	(NLM_F_CREATE|NLM_F_APPEND)
  137: 
  138: static linpool *nl_linpool;
  139: 
  140: static struct nl_sock nl_scan = {.fd = -1};	/* Netlink socket for synchronous scan */
  141: static struct nl_sock nl_req  = {.fd = -1};	/* Netlink socket for requests */
  142: 
  143: static void
  144: nl_open_sock(struct nl_sock *nl)
  145: {
  146:   if (nl->fd < 0)
  147:     {
  148:       nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  149:       if (nl->fd < 0)
  150: 	die("Unable to open rtnetlink socket: %m");
  151:       nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
  152:       nl->rx_buffer = xmalloc(NL_RX_SIZE);
  153:       nl->last_hdr = NULL;
  154:       nl->last_size = 0;
  155:     }
  156: }
  157: 
  158: static void
  159: nl_open(void)
  160: {
  161:   nl_open_sock(&nl_scan);
  162:   nl_open_sock(&nl_req);
  163: }
  164: 
  165: static void
  166: nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
  167: {
  168:   struct sockaddr_nl sa;
  169: 
  170:   memset(&sa, 0, sizeof(sa));
  171:   sa.nl_family = AF_NETLINK;
  172:   nh->nlmsg_pid = 0;
  173:   nh->nlmsg_seq = ++(nl->seq);
  174:   if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
  175:     die("rtnetlink sendto: %m");
  176:   nl->last_hdr = NULL;
  177: }
  178: 
  179: static void
  180: nl_request_dump(int af, int cmd)
  181: {
  182:   struct {
  183:     struct nlmsghdr nh;
  184:     struct rtgenmsg g;
  185:   } req = {
  186:     .nh.nlmsg_type = cmd,
  187:     .nh.nlmsg_len = sizeof(req),
  188:     .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
  189:     .g.rtgen_family = af
  190:   };
  191:   nl_send(&nl_scan, &req.nh);
  192: }
  193: 
  194: static struct nlmsghdr *
  195: nl_get_reply(struct nl_sock *nl)
  196: {
  197:   for(;;)
  198:     {
  199:       if (!nl->last_hdr)
  200: 	{
  201: 	  struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
  202: 	  struct sockaddr_nl sa;
  203: 	  struct msghdr m = {
  204: 	    .msg_name = &sa,
  205: 	    .msg_namelen = sizeof(sa),
  206: 	    .msg_iov = &iov,
  207: 	    .msg_iovlen = 1,
  208: 	  };
  209: 	  int x = recvmsg(nl->fd, &m, 0);
  210: 	  if (x < 0)
  211: 	    die("nl_get_reply: %m");
  212: 	  if (sa.nl_pid)		/* It isn't from the kernel */
  213: 	    {
  214: 	      DBG("Non-kernel packet\n");
  215: 	      continue;
  216: 	    }
  217: 	  nl->last_size = x;
  218: 	  nl->last_hdr = (void *) nl->rx_buffer;
  219: 	  if (m.msg_flags & MSG_TRUNC)
  220: 	    bug("nl_get_reply: got truncated reply which should be impossible");
  221: 	}
  222:       if (NLMSG_OK(nl->last_hdr, nl->last_size))
  223: 	{
  224: 	  struct nlmsghdr *h = nl->last_hdr;
  225: 	  nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
  226: 	  if (h->nlmsg_seq != nl->seq)
  227: 	    {
  228: 	      log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
  229: 		  h->nlmsg_seq, nl->seq);
  230: 	      continue;
  231: 	    }
  232: 	  return h;
  233: 	}
  234:       if (nl->last_size)
  235: 	log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
  236:       nl->last_hdr = NULL;
  237:     }
  238: }
  239: 
  240: static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
  241: 
  242: static int
  243: nl_error(struct nlmsghdr *h, int ignore_esrch)
  244: {
  245:   struct nlmsgerr *e;
  246:   int ec;
  247: 
  248:   if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
  249:     {
  250:       log(L_WARN "Netlink: Truncated error message received");
  251:       return ENOBUFS;
  252:     }
  253:   e = (struct nlmsgerr *) NLMSG_DATA(h);
  254:   ec = -e->error;
  255:   if (ec && !(ignore_esrch && (ec == ESRCH)))
  256:     log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
  257:   return ec;
  258: }
  259: 
  260: static struct nlmsghdr *
  261: nl_get_scan(void)
  262: {
  263:   struct nlmsghdr *h = nl_get_reply(&nl_scan);
  264: 
  265:   if (h->nlmsg_type == NLMSG_DONE)
  266:     return NULL;
  267:   if (h->nlmsg_type == NLMSG_ERROR)
  268:     {
  269:       nl_error(h, 0);
  270:       return NULL;
  271:     }
  272:   return h;
  273: }
  274: 
  275: static int
  276: nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
  277: {
  278:   struct nlmsghdr *h;
  279: 
  280:   nl_send(&nl_req, pkt);
  281:   for(;;)
  282:     {
  283:       h = nl_get_reply(&nl_req);
  284:       if (h->nlmsg_type == NLMSG_ERROR)
  285: 	break;
  286:       log(L_WARN "nl_exchange: Unexpected reply received");
  287:     }
  288:   return nl_error(h, ignore_esrch) ? -1 : 0;
  289: }
  290: 
  291: /*
  292:  *	Netlink attributes
  293:  */
  294: 
  295: static int nl_attr_len;
  296: 
  297: static void *
  298: nl_checkin(struct nlmsghdr *h, int lsize)
  299: {
  300:   nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
  301:   if (nl_attr_len < 0)
  302:     {
  303:       log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
  304:       return NULL;
  305:     }
  306:   return NLMSG_DATA(h);
  307: }
  308: 
  309: struct nl_want_attrs {
  310:   u8 defined:1;
  311:   u8 checksize:1;
  312:   u8 size;
  313: };
  314: 
  315: 
  316: #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
  317: 
  318: static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
  319:   [IFLA_IFNAME]	  = { 1, 0, 0 },
  320:   [IFLA_MTU]	  = { 1, 1, sizeof(u32) },
  321:   [IFLA_MASTER]	  = { 1, 1, sizeof(u32) },
  322:   [IFLA_WIRELESS] = { 1, 0, 0 },
  323: };
  324: 
  325: 
  326: #define BIRD_IFA_MAX  (IFA_FLAGS+1)
  327: 
  328: static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
  329:   [IFA_ADDRESS]	  = { 1, 1, sizeof(ip4_addr) },
  330:   [IFA_LOCAL]	  = { 1, 1, sizeof(ip4_addr) },
  331:   [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
  332:   [IFA_FLAGS]     = { 1, 1, sizeof(u32) },
  333: };
  334: 
  335: static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
  336:   [IFA_ADDRESS]	  = { 1, 1, sizeof(ip6_addr) },
  337:   [IFA_LOCAL]	  = { 1, 1, sizeof(ip6_addr) },
  338:   [IFA_FLAGS]	  = { 1, 1, sizeof(u32) },
  339: };
  340: 
  341: 
  342: #define BIRD_RTA_MAX  (RTA_ENCAP+1)
  343: 
  344: static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
  345:   [RTA_GATEWAY]	  = { 1, 1, sizeof(ip4_addr) },
  346:   [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
  347:   [RTA_ENCAP]	  = { 1, 0, 0 },
  348: };
  349: 
  350: static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = {
  351:   [RTA_GATEWAY]	  = { 1, 1, sizeof(ip6_addr) },
  352:   [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
  353:   [RTA_ENCAP]	  = { 1, 0, 0 },
  354: };
  355: 
  356: #ifdef HAVE_MPLS_KERNEL
  357: static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
  358:   [RTA_DST]       = { 1, 0, 0 },
  359: };
  360: #endif
  361: 
  362: static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
  363:   [RTA_DST]	  = { 1, 1, sizeof(ip4_addr) },
  364:   [RTA_OIF]	  = { 1, 1, sizeof(u32) },
  365:   [RTA_GATEWAY]	  = { 1, 1, sizeof(ip4_addr) },
  366:   [RTA_PRIORITY]  = { 1, 1, sizeof(u32) },
  367:   [RTA_PREFSRC]	  = { 1, 1, sizeof(ip4_addr) },
  368:   [RTA_METRICS]	  = { 1, 0, 0 },
  369:   [RTA_MULTIPATH] = { 1, 0, 0 },
  370:   [RTA_FLOW]	  = { 1, 1, sizeof(u32) },
  371:   [RTA_TABLE]	  = { 1, 1, sizeof(u32) },
  372:   [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
  373:   [RTA_ENCAP]	  = { 1, 0, 0 },
  374: };
  375: 
  376: static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
  377:   [RTA_DST]	  = { 1, 1, sizeof(ip6_addr) },
  378:   [RTA_SRC]	  = { 1, 1, sizeof(ip6_addr) },
  379:   [RTA_IIF]	  = { 1, 1, sizeof(u32) },
  380:   [RTA_OIF]	  = { 1, 1, sizeof(u32) },
  381:   [RTA_GATEWAY]	  = { 1, 1, sizeof(ip6_addr) },
  382:   [RTA_PRIORITY]  = { 1, 1, sizeof(u32) },
  383:   [RTA_PREFSRC]	  = { 1, 1, sizeof(ip6_addr) },
  384:   [RTA_METRICS]	  = { 1, 0, 0 },
  385:   [RTA_MULTIPATH] = { 1, 0, 0 },
  386:   [RTA_FLOW]	  = { 1, 1, sizeof(u32) },
  387:   [RTA_TABLE]	  = { 1, 1, sizeof(u32) },
  388:   [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
  389:   [RTA_ENCAP]	  = { 1, 0, 0 },
  390: };
  391: 
  392: #ifdef HAVE_MPLS_KERNEL
  393: static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
  394:   [RTA_DST]	  = { 1, 1, sizeof(u32) },
  395:   [RTA_IIF]	  = { 1, 1, sizeof(u32) },
  396:   [RTA_OIF]	  = { 1, 1, sizeof(u32) },
  397:   [RTA_PRIORITY]  = { 1, 1, sizeof(u32) },
  398:   [RTA_METRICS]	  = { 1, 0, 0 },
  399:   [RTA_FLOW]	  = { 1, 1, sizeof(u32) },
  400:   [RTA_TABLE]	  = { 1, 1, sizeof(u32) },
  401:   [RTA_VIA]	  = { 1, 0, 0 },
  402:   [RTA_NEWDST]	  = { 1, 0, 0 },
  403: };
  404: #endif
  405: 
  406: 
  407: static int
  408: nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
  409: {
  410:   int max = ksize / sizeof(struct rtattr *);
  411:   bzero(k, ksize);
  412: 
  413:   for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
  414:     {
  415:       if ((a->rta_type >= max) || !want[a->rta_type].defined)
  416: 	continue;
  417: 
  418:       if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
  419: 	{
  420: 	  log(L_ERR "nl_parse_attrs: Malformed attribute received");
  421: 	  return 0;
  422: 	}
  423: 
  424:       k[a->rta_type] = a;
  425:     }
  426: 
  427:   if (nl_attr_len)
  428:     {
  429:       log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
  430:       return 0;
  431:     }
  432: 
  433:   return 1;
  434: }
  435: 
  436: static inline u16 rta_get_u16(struct rtattr *a)
  437: { return *(u16 *) RTA_DATA(a); }
  438: 
  439: static inline u32 rta_get_u32(struct rtattr *a)
  440: { return *(u32 *) RTA_DATA(a); }
  441: 
  442: static inline ip4_addr rta_get_ip4(struct rtattr *a)
  443: { return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
  444: 
  445: static inline ip6_addr rta_get_ip6(struct rtattr *a)
  446: { return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
  447: 
  448: static inline ip_addr rta_get_ipa(struct rtattr *a)
  449: {
  450:   if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
  451:     return ipa_from_ip4(rta_get_ip4(a));
  452:   else
  453:     return ipa_from_ip6(rta_get_ip6(a));
  454: }
  455: 
  456: #ifdef HAVE_MPLS_KERNEL
  457: static inline ip_addr rta_get_via(struct rtattr *a)
  458: {
  459:   struct rtvia *v = RTA_DATA(a);
  460:   switch(v->rtvia_family) {
  461:     case AF_INET:  return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
  462:     case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
  463:   }
  464:   return IPA_NONE;
  465: }
  466: 
  467: static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
  468: static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
  469: {
  470:   if (!a)
  471:     return 0;
  472: 
  473:   if (RTA_PAYLOAD(a) % 4)
  474:     log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
  475: 
  476:   int labels = mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
  477: 
  478:   if (labels < 0)
  479:   {
  480:     log(L_WARN "KRT: Too long MPLS stack received, ignoring");
  481:     labels = 0;
  482:   }
  483: 
  484:   return labels;
  485: }
  486: #endif
  487: 
  488: struct rtattr *
  489: nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
  490: {
  491:   uint pos = NLMSG_ALIGN(h->nlmsg_len);
  492:   uint len = RTA_LENGTH(dlen);
  493: 
  494:   if (pos + len > bufsize)
  495:     bug("nl_add_attr: packet buffer overflow");
  496: 
  497:   struct rtattr *a = (struct rtattr *)((char *)h + pos);
  498:   a->rta_type = code;
  499:   a->rta_len = len;
  500:   h->nlmsg_len = pos + len;
  501: 
  502:   if (dlen > 0)
  503:     memcpy(RTA_DATA(a), data, dlen);
  504: 
  505:   return a;
  506: }
  507: 
  508: static inline struct rtattr *
  509: nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
  510: {
  511:   return nl_add_attr(h, bufsize, code, NULL, 0);
  512: }
  513: 
  514: static inline void
  515: nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
  516: {
  517:   a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
  518: }
  519: 
  520: static inline void
  521: nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
  522: {
  523:   nl_add_attr(h, bufsize, code, &data, 2);
  524: }
  525: 
  526: static inline void
  527: nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
  528: {
  529:   nl_add_attr(h, bufsize, code, &data, 4);
  530: }
  531: 
  532: static inline void
  533: nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
  534: {
  535:   ip4 = ip4_hton(ip4);
  536:   nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
  537: }
  538: 
  539: static inline void
  540: nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
  541: {
  542:   ip6 = ip6_hton(ip6);
  543:   nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
  544: }
  545: 
  546: static inline void
  547: nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
  548: {
  549:   if (ipa_is_ip4(ipa))
  550:     nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
  551:   else
  552:     nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
  553: }
  554: 
  555: #ifdef HAVE_MPLS_KERNEL
  556: static inline void
  557: nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
  558: {
  559:   char buf[len*4];
  560:   mpls_put(buf, len, stack);
  561:   nl_add_attr(h, bufsize, code, buf, len*4);
  562: }
  563: 
  564: static inline void
  565: nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
  566: {
  567:   nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
  568: 
  569:   struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
  570:   nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
  571:   nl_close_attr(h, nest);
  572: }
  573: 
  574: static inline void
  575: nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
  576: {
  577:   struct rtvia *via = alloca(sizeof(struct rtvia) + 16);
  578: 
  579:   if (ipa_is_ip4(ipa))
  580:   {
  581:     via->rtvia_family = AF_INET;
  582:     put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
  583:     nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4);
  584:   }
  585:   else
  586:   {
  587:     via->rtvia_family = AF_INET6;
  588:     put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
  589:     nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16);
  590:   }
  591: }
  592: #endif
  593: 
  594: static inline struct rtnexthop *
  595: nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
  596: {
  597:   uint pos = NLMSG_ALIGN(h->nlmsg_len);
  598:   uint len = RTNH_LENGTH(0);
  599: 
  600:   if (pos + len > bufsize)
  601:     bug("nl_open_nexthop: packet buffer overflow");
  602: 
  603:   h->nlmsg_len = pos + len;
  604: 
  605:   return (void *)h + pos;
  606: }
  607: 
  608: static inline void
  609: nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
  610: {
  611:   nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
  612: }
  613: 
  614: static inline void
  615: nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED)
  616: {
  617: #ifdef HAVE_MPLS_KERNEL
  618:   if (nh->labels > 0)
  619:     if (af == AF_MPLS)
  620:       nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
  621:     else
  622:       nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
  623: 
  624:   if (ipa_nonzero(nh->gw))
  625:     if (af == AF_MPLS)
  626:       nl_add_attr_via(h, bufsize, nh->gw);
  627:     else
  628:       nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
  629: #else
  630: 
  631:   if (ipa_nonzero(nh->gw))
  632:     nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
  633: #endif
  634: }
  635: 
  636: static void
  637: nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
  638: {
  639:   struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
  640: 
  641:   for (; nh; nh = nh->next)
  642:   {
  643:     struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
  644: 
  645:     rtnh->rtnh_flags = 0;
  646:     rtnh->rtnh_hops = nh->weight;
  647:     rtnh->rtnh_ifindex = nh->iface->index;
  648: 
  649:     nl_add_nexthop(h, bufsize, nh, af);
  650: 
  651:     if (nh->flags & RNF_ONLINK)
  652:       rtnh->rtnh_flags |= RTNH_F_ONLINK;
  653: 
  654:     nl_close_nexthop(h, rtnh);
  655:   }
  656: 
  657:   nl_close_attr(h, a);
  658: }
  659: 
  660: static struct nexthop *
  661: nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af)
  662: {
  663:   struct rtattr *a[BIRD_RTA_MAX];
  664:   struct rtnexthop *nh = RTA_DATA(ra);
  665:   struct nexthop *rv, *first, **last;
  666:   unsigned len = RTA_PAYLOAD(ra);
  667: 
  668:   first = NULL;
  669:   last = &first;
  670: 
  671:   while (len)
  672:     {
  673:       /* Use RTNH_OK(nh,len) ?? */
  674:       if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
  675: 	return NULL;
  676: 
  677:       *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
  678:       last = &(rv->next);
  679: 
  680:       rv->weight = nh->rtnh_hops;
  681:       rv->iface = if_find_by_index(nh->rtnh_ifindex);
  682:       if (!rv->iface)
  683: 	return NULL;
  684: 
  685:       /* Nonexistent RTNH_PAYLOAD ?? */
  686:       nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
  687:       switch (af)
  688:         {
  689: 	case AF_INET:
  690: 	  if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
  691: 	    return NULL;
  692: 	  break;
  693: 
  694: 	case AF_INET6:
  695: 	  if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
  696: 	    return NULL;
  697: 	  break;
  698: 
  699: 	default:
  700: 	  return NULL;
  701: 	}
  702: 
  703:       if (a[RTA_GATEWAY])
  704: 	{
  705: 	  rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
  706: 
  707: 	  if (nh->rtnh_flags & RTNH_F_ONLINK)
  708: 	    rv->flags |= RNF_ONLINK;
  709: 
  710: 	  neighbor *nbr;
  711: 	  nbr = neigh_find(&p->p, rv->gw, rv->iface,
  712: 			   (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
  713: 	  if (!nbr || (nbr->scope == SCOPE_HOST))
  714: 	    return NULL;
  715: 	}
  716:       else
  717: 	rv->gw = IPA_NONE;
  718: 
  719: #ifdef HAVE_MPLS_KERNEL
  720:       if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE])
  721:       {
  722: 	if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
  723: 	  log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
  724: 	  return NULL;
  725: 	}
  726: 
  727: 	struct rtattr *enca[BIRD_RTA_MAX];
  728: 	nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
  729: 	nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
  730: 	rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
  731:       }
  732: #endif
  733: 
  734: 
  735:       len -= NLMSG_ALIGN(nh->rtnh_len);
  736:       nh = RTNH_NEXT(nh);
  737:     }
  738: 
  739:   /* Ensure nexthops are sorted to satisfy nest invariant */
  740:   if (!nexthop_is_sorted(first))
  741:     first = nexthop_sort(first);
  742: 
  743:   return first;
  744: }
  745: 
  746: static void
  747: nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
  748: {
  749:   struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
  750:   int t;
  751: 
  752:   for (t = 1; t < max; t++)
  753:     if (metrics[0] & (1 << t))
  754:       nl_add_attr_u32(h, bufsize, t, metrics[t]);
  755: 
  756:   nl_close_attr(h, a);
  757: }
  758: 
  759: static int
  760: nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
  761: {
  762:   struct rtattr *a = RTA_DATA(hdr);
  763:   int len = RTA_PAYLOAD(hdr);
  764: 
  765:   metrics[0] = 0;
  766:   for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
  767:   {
  768:     if (a->rta_type == RTA_UNSPEC)
  769:       continue;
  770: 
  771:     if (a->rta_type >= max)
  772:       continue;
  773: 
  774:     if (RTA_PAYLOAD(a) != 4)
  775:       return -1;
  776: 
  777:     metrics[0] |= 1 << a->rta_type;
  778:     metrics[a->rta_type] = rta_get_u32(a);
  779:   }
  780: 
  781:   if (len > 0)
  782:     return -1;
  783: 
  784:   return 0;
  785: }
  786: 
  787: 
  788: /*
  789:  *	Scanning of interfaces
  790:  */
  791: 
  792: static void
  793: nl_parse_link(struct nlmsghdr *h, int scan)
  794: {
  795:   struct ifinfomsg *i;
  796:   struct rtattr *a[BIRD_IFLA_MAX];
  797:   int new = h->nlmsg_type == RTM_NEWLINK;
  798:   struct iface f = {};
  799:   struct iface *ifi;
  800:   char *name;
  801:   u32 mtu, master = 0;
  802:   uint fl;
  803: 
  804:   if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
  805:     return;
  806:   if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
  807:     {
  808:       /*
  809:        * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
  810:        * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
  811:        * We simply ignore all such messages with IFLA_WIRELESS without notice.
  812:        */
  813: 
  814:       if (a[IFLA_WIRELESS])
  815: 	return;
  816: 
  817:       log(L_ERR "KIF: Malformed message received");
  818:       return;
  819:     }
  820: 
  821:   name = RTA_DATA(a[IFLA_IFNAME]);
  822:   mtu = rta_get_u32(a[IFLA_MTU]);
  823: 
  824:   if (a[IFLA_MASTER])
  825:     master = rta_get_u32(a[IFLA_MASTER]);
  826: 
  827:   ifi = if_find_by_index(i->ifi_index);
  828:   if (!new)
  829:     {
  830:       DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
  831:       if (!ifi)
  832: 	return;
  833: 
  834:       if_delete(ifi);
  835:     }
  836:   else
  837:     {
  838:       DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
  839:       if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
  840: 	if_delete(ifi);
  841: 
  842:       strncpy(f.name, name, sizeof(f.name)-1);
  843:       f.index = i->ifi_index;
  844:       f.mtu = mtu;
  845: 
  846:       f.master_index = master;
  847:       f.master = if_find_by_index(master);
  848: 
  849:       fl = i->ifi_flags;
  850:       if (fl & IFF_UP)
  851: 	f.flags |= IF_ADMIN_UP;
  852:       if (fl & IFF_LOWER_UP)
  853: 	f.flags |= IF_LINK_UP;
  854:       if (fl & IFF_LOOPBACK)		/* Loopback */
  855: 	f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
  856:       else if (fl & IFF_POINTOPOINT)	/* PtP */
  857: 	f.flags |= IF_MULTICAST;
  858:       else if (fl & IFF_BROADCAST)	/* Broadcast */
  859: 	f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
  860:       else
  861: 	f.flags |= IF_MULTIACCESS;	/* NBMA */
  862: 
  863:       if (fl & IFF_MULTICAST)
  864: 	f.flags |= IF_MULTICAST;
  865: 
  866:       ifi = if_update(&f);
  867: 
  868:       if (!scan)
  869: 	if_end_partial_update(ifi);
  870:     }
  871: }
  872: 
  873: static void
  874: nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
  875: {
  876:   struct rtattr *a[BIRD_IFA_MAX];
  877:   struct iface *ifi;
  878:   u32 ifa_flags;
  879:   int scope;
  880: 
  881:   if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
  882:     return;
  883: 
  884:   if (!a[IFA_LOCAL])
  885:     {
  886:       log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
  887:       return;
  888:     }
  889:   if (!a[IFA_ADDRESS])
  890:     {
  891:       log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
  892:       return;
  893:     }
  894: 
  895:   ifi = if_find_by_index(i->ifa_index);
  896:   if (!ifi)
  897:     {
  898:       log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
  899:       return;
  900:     }
  901: 
  902:   if (a[IFA_FLAGS])
  903:     ifa_flags = rta_get_u32(a[IFA_FLAGS]);
  904:   else
  905:     ifa_flags = i->ifa_flags;
  906: 
  907:   struct ifa ifa;
  908:   bzero(&ifa, sizeof(ifa));
  909:   ifa.iface = ifi;
  910:   if (ifa_flags & IFA_F_SECONDARY)
  911:     ifa.flags |= IA_SECONDARY;
  912: 
  913:   ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
  914: 
  915:   if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
  916:     {
  917:       log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
  918:       new = 0;
  919:     }
  920:   if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
  921:     {
  922:       ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
  923:       net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
  924: 
  925:       /* It is either a host address or a peer address */
  926:       if (ipa_equal(ifa.ip, ifa.brd))
  927: 	ifa.flags |= IA_HOST;
  928:       else
  929: 	{
  930: 	  ifa.flags |= IA_PEER;
  931: 	  ifa.opposite = ifa.brd;
  932: 	}
  933:     }
  934:   else
  935:     {
  936:       net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
  937:       net_normalize(&ifa.prefix);
  938: 
  939:       if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
  940: 	ifa.opposite = ipa_opposite_m1(ifa.ip);
  941: 
  942:       if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
  943: 	ifa.opposite = ipa_opposite_m2(ifa.ip);
  944: 
  945:       if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
  946: 	{
  947: 	  ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]);
  948: 	  ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen)));
  949: 
  950: 	  if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd))
  951: 	    ifa.brd = ipa_from_ip4(xbrd);
  952: 	  else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
  953: 	    {
  954: 	      log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name);
  955: 	      ifa.brd = ipa_from_ip4(ybrd);
  956: 	    }
  957: 	}
  958:     }
  959: 
  960:   scope = ipa_classify(ifa.ip);
  961:   if (scope < 0)
  962:     {
  963:       log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
  964:       return;
  965:     }
  966:   ifa.scope = scope & IADDR_SCOPE_MASK;
  967: 
  968:   DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
  969:       ifi->index, ifi->name,
  970:       new ? "added" : "removed",
  971:       ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
  972: 
  973:   if (new)
  974:     ifa_update(&ifa);
  975:   else
  976:     ifa_delete(&ifa);
  977: 
  978:   if (!scan)
  979:     if_end_partial_update(ifi);
  980: }
  981: 
  982: static void
  983: nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
  984: {
  985:   struct rtattr *a[BIRD_IFA_MAX];
  986:   struct iface *ifi;
  987:   u32 ifa_flags;
  988:   int scope;
  989: 
  990:   if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
  991:     return;
  992: 
  993:   if (!a[IFA_ADDRESS])
  994:     {
  995:       log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
  996:       return;
  997:     }
  998: 
  999:   ifi = if_find_by_index(i->ifa_index);
 1000:   if (!ifi)
 1001:     {
 1002:       log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
 1003:       return;
 1004:     }
 1005: 
 1006:   if (a[IFA_FLAGS])
 1007:     ifa_flags = rta_get_u32(a[IFA_FLAGS]);
 1008:   else
 1009:     ifa_flags = i->ifa_flags;
 1010: 
 1011:   struct ifa ifa;
 1012:   bzero(&ifa, sizeof(ifa));
 1013:   ifa.iface = ifi;
 1014:   if (ifa_flags & IFA_F_SECONDARY)
 1015:     ifa.flags |= IA_SECONDARY;
 1016: 
 1017:   /* Ignore tentative addresses silently */
 1018:   if (ifa_flags & IFA_F_TENTATIVE)
 1019:     return;
 1020: 
 1021:   /* IFA_LOCAL can be unset for IPv6 interfaces */
 1022:   ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
 1023: 
 1024:   if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
 1025:     {
 1026:       log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
 1027:       new = 0;
 1028:     }
 1029:   if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
 1030:     {
 1031:       ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
 1032:       net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
 1033: 
 1034:       /* It is either a host address or a peer address */
 1035:       if (ipa_equal(ifa.ip, ifa.brd))
 1036: 	ifa.flags |= IA_HOST;
 1037:       else
 1038: 	{
 1039: 	  ifa.flags |= IA_PEER;
 1040: 	  ifa.opposite = ifa.brd;
 1041: 	}
 1042:     }
 1043:   else
 1044:     {
 1045:       net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
 1046:       net_normalize(&ifa.prefix);
 1047: 
 1048:       if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
 1049: 	ifa.opposite = ipa_opposite_m1(ifa.ip);
 1050:     }
 1051: 
 1052:   scope = ipa_classify(ifa.ip);
 1053:   if (scope < 0)
 1054:     {
 1055:       log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
 1056:       return;
 1057:     }
 1058:   ifa.scope = scope & IADDR_SCOPE_MASK;
 1059: 
 1060:   DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
 1061:       ifi->index, ifi->name,
 1062:       new ? "added" : "removed",
 1063:       ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
 1064: 
 1065:   if (new)
 1066:     ifa_update(&ifa);
 1067:   else
 1068:     ifa_delete(&ifa);
 1069: 
 1070:   if (!scan)
 1071:     if_end_partial_update(ifi);
 1072: }
 1073: 
 1074: static void
 1075: nl_parse_addr(struct nlmsghdr *h, int scan)
 1076: {
 1077:   struct ifaddrmsg *i;
 1078: 
 1079:   if (!(i = nl_checkin(h, sizeof(*i))))
 1080:     return;
 1081: 
 1082:   int new = (h->nlmsg_type == RTM_NEWADDR);
 1083: 
 1084:   switch (i->ifa_family)
 1085:     {
 1086:       case AF_INET:
 1087: 	return nl_parse_addr4(i, scan, new);
 1088: 
 1089:       case AF_INET6:
 1090: 	return nl_parse_addr6(i, scan, new);
 1091:     }
 1092: }
 1093: 
 1094: void
 1095: kif_do_scan(struct kif_proto *p UNUSED)
 1096: {
 1097:   struct nlmsghdr *h;
 1098: 
 1099:   if_start_update();
 1100: 
 1101:   nl_request_dump(AF_UNSPEC, RTM_GETLINK);
 1102:   while (h = nl_get_scan())
 1103:     if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
 1104:       nl_parse_link(h, 1);
 1105:     else
 1106:       log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
 1107: 
 1108:   /* Re-resolve master interface for slaves */
 1109:   struct iface *i;
 1110:   WALK_LIST(i, iface_list)
 1111:     if (i->master_index)
 1112:     {
 1113:       struct iface f = {
 1114: 	.flags = i->flags,
 1115: 	.mtu = i->mtu,
 1116: 	.index = i->index,
 1117: 	.master_index = i->master_index,
 1118: 	.master = if_find_by_index(i->master_index)
 1119:       };
 1120: 
 1121:       if (f.master != i->master)
 1122:       {
 1123: 	memcpy(f.name, i->name, sizeof(f.name));
 1124: 	if_update(&f);
 1125:       }
 1126:     }
 1127: 
 1128:   nl_request_dump(AF_INET, RTM_GETADDR);
 1129:   while (h = nl_get_scan())
 1130:     if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
 1131:       nl_parse_addr(h, 1);
 1132:     else
 1133:       log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
 1134: 
 1135:   nl_request_dump(AF_INET6, RTM_GETADDR);
 1136:   while (h = nl_get_scan())
 1137:     if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
 1138:       nl_parse_addr(h, 1);
 1139:     else
 1140:       log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
 1141: 
 1142:   if_end_update();
 1143: }
 1144: 
 1145: /*
 1146:  *	Routes
 1147:  */
 1148: 
 1149: static inline u32
 1150: krt_table_id(struct krt_proto *p)
 1151: {
 1152:   return KRT_CF->sys.table_id;
 1153: }
 1154: 
 1155: static HASH(struct krt_proto) nl_table_map;
 1156: 
 1157: #define RTH_KEY(p)		p->af, krt_table_id(p)
 1158: #define RTH_NEXT(p)		p->sys.hash_next
 1159: #define RTH_EQ(a1,i1,a2,i2)	a1 == a2 && i1 == i2
 1160: #define RTH_FN(a,i)		a ^ u32_hash(i)
 1161: 
 1162: #define RTH_REHASH		rth_rehash
 1163: #define RTH_PARAMS		/8, *2, 2, 2, 6, 20
 1164: 
 1165: HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
 1166: 
 1167: int
 1168: krt_capable(rte *e)
 1169: {
 1170:   rta *a = e->attrs;
 1171: 
 1172:   switch (a->dest)
 1173:   {
 1174:     case RTD_UNICAST:
 1175:     case RTD_BLACKHOLE:
 1176:     case RTD_UNREACHABLE:
 1177:     case RTD_PROHIBIT:
 1178:       return 1;
 1179: 
 1180:     default:
 1181:       return 0;
 1182:   }
 1183: }
 1184: 
 1185: static inline int
 1186: nh_bufsize(struct nexthop *nh)
 1187: {
 1188:   int rv = 0;
 1189:   for (; nh != NULL; nh = nh->next)
 1190:     rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
 1191:   return rv;
 1192: }
 1193: 
 1194: static int
 1195: nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh)
 1196: {
 1197:   eattr *ea;
 1198:   net *net = e->net;
 1199:   rta *a = e->attrs;
 1200:   ea_list *eattrs = a->eattrs;
 1201:   int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
 1202:   u32 priority = 0;
 1203: 
 1204:   struct {
 1205:     struct nlmsghdr h;
 1206:     struct rtmsg r;
 1207:     char buf[0];
 1208:   } *r;
 1209: 
 1210:   int rsize = sizeof(*r) + bufsize;
 1211:   r = alloca(rsize);
 1212: 
 1213:   DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
 1214: 
 1215:   bzero(&r->h, sizeof(r->h));
 1216:   bzero(&r->r, sizeof(r->r));
 1217:   r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
 1218:   r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
 1219:   r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
 1220: 
 1221:   r->r.rtm_family = p->af;
 1222:   r->r.rtm_dst_len = net_pxlen(net->n.addr);
 1223:   r->r.rtm_protocol = RTPROT_BIRD;
 1224:   r->r.rtm_scope = RT_SCOPE_NOWHERE;
 1225: #ifdef HAVE_MPLS_KERNEL
 1226:   if (p->af == AF_MPLS)
 1227:   {
 1228:     /*
 1229:      * Kernel MPLS code is a bit picky. We must:
 1230:      * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
 1231:      * 2) Never use RTA_PRIORITY
 1232:      */
 1233: 
 1234:     u32 label = net_mpls(net->n.addr);
 1235:     nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
 1236:     r->r.rtm_scope = RT_SCOPE_UNIVERSE;
 1237:     r->r.rtm_type = RTN_UNICAST;
 1238:   }
 1239:   else
 1240: #endif
 1241:   {
 1242:     nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
 1243: 
 1244:     /* Add source address for IPv6 SADR routes */
 1245:     if (net->n.addr->type == NET_IP6_SADR)
 1246:     {
 1247:       net_addr_ip6_sadr *a = (void *) &net->n.addr;
 1248:       nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix);
 1249:       r->r.rtm_src_len = a->src_pxlen;
 1250:     }
 1251:   }
 1252: 
 1253:   /*
 1254:    * Strange behavior for RTM_DELROUTE:
 1255:    * 1) rtm_family is ignored in IPv6, works for IPv4
 1256:    * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
 1257:    * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
 1258:    */
 1259: 
 1260:   if (krt_table_id(p) < 256)
 1261:     r->r.rtm_table = krt_table_id(p);
 1262:   else
 1263:     nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
 1264: 
 1265:   if (p->af == AF_MPLS)
 1266:     priority = 0;
 1267:   else if (a->source == RTS_DUMMY)
 1268:     priority = e->u.krt.metric;
 1269:   else if (KRT_CF->sys.metric)
 1270:     priority = KRT_CF->sys.metric;
 1271:   else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
 1272:     priority = ea->u.data;
 1273: 
 1274:   if (priority)
 1275:     nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
 1276: 
 1277:   /* For route delete, we do not specify remaining route attributes */
 1278:   if (op == NL_OP_DELETE)
 1279:     goto dest;
 1280: 
 1281:   /* Default scope is LINK for device routes, UNIVERSE otherwise */
 1282:   if (p->af == AF_MPLS)
 1283:     r->r.rtm_scope = RT_SCOPE_UNIVERSE;
 1284:   else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
 1285:     r->r.rtm_scope = ea->u.data;
 1286:   else
 1287:     r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
 1288: 
 1289:   if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
 1290:     nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
 1291: 
 1292:   if (ea = ea_find(eattrs, EA_KRT_REALM))
 1293:     nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
 1294: 
 1295: 
 1296:   u32 metrics[KRT_METRICS_MAX];
 1297:   metrics[0] = 0;
 1298: 
 1299:   struct ea_walk_state ews = { .eattrs = eattrs };
 1300:   while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
 1301:   {
 1302:     int id = ea->id - EA_KRT_METRICS;
 1303:     metrics[0] |= 1 << id;
 1304:     metrics[id] = ea->u.data;
 1305:   }
 1306: 
 1307:   if (metrics[0])
 1308:     nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
 1309: 
 1310: 
 1311: dest:
 1312:   switch (dest)
 1313:     {
 1314:     case RTD_UNICAST:
 1315:       r->r.rtm_type = RTN_UNICAST;
 1316:       if (nh->next && !krt_ecmp6(p))
 1317: 	nl_add_multipath(&r->h, rsize, nh, p->af);
 1318:       else
 1319:       {
 1320: 	nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
 1321: 	nl_add_nexthop(&r->h, rsize, nh, p->af);
 1322: 
 1323: 	if (nh->flags & RNF_ONLINK)
 1324: 	  r->r.rtm_flags |= RTNH_F_ONLINK;
 1325:       }
 1326:       break;
 1327:     case RTD_BLACKHOLE:
 1328:       r->r.rtm_type = RTN_BLACKHOLE;
 1329:       break;
 1330:     case RTD_UNREACHABLE:
 1331:       r->r.rtm_type = RTN_UNREACHABLE;
 1332:       break;
 1333:     case RTD_PROHIBIT:
 1334:       r->r.rtm_type = RTN_PROHIBIT;
 1335:       break;
 1336:     case RTD_NONE:
 1337:       break;
 1338:     default:
 1339:       bug("krt_capable inconsistent with nl_send_route");
 1340:     }
 1341: 
 1342:   /* Ignore missing for DELETE */
 1343:   return nl_exchange(&r->h, (op == NL_OP_DELETE));
 1344: }
 1345: 
 1346: static inline int
 1347: nl_add_rte(struct krt_proto *p, rte *e)
 1348: {
 1349:   rta *a = e->attrs;
 1350:   int err = 0;
 1351: 
 1352:   if (krt_ecmp6(p) && a->nh.next)
 1353:   {
 1354:     struct nexthop *nh = &(a->nh);
 1355: 
 1356:     err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh);
 1357:     if (err < 0)
 1358:       return err;
 1359: 
 1360:     for (nh = nh->next; nh; nh = nh->next)
 1361:       err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh);
 1362: 
 1363:     return err;
 1364:   }
 1365: 
 1366:   return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh));
 1367: }
 1368: 
 1369: static inline int
 1370: nl_delete_rte(struct krt_proto *p, rte *e)
 1371: {
 1372:   int err = 0;
 1373: 
 1374:   /* For IPv6, we just repeatedly request DELETE until we get error */
 1375:   do
 1376:     err = nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL);
 1377:   while (krt_ecmp6(p) && !err);
 1378: 
 1379:   return err;
 1380: }
 1381: 
 1382: static inline int
 1383: nl_replace_rte(struct krt_proto *p, rte *e)
 1384: {
 1385:   rta *a = e->attrs;
 1386:   return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh));
 1387: }
 1388: 
 1389: 
 1390: void
 1391: krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old)
 1392: {
 1393:   int err = 0;
 1394: 
 1395:   /*
 1396:    * We use NL_OP_REPLACE for IPv4, it has an issue with not checking for
 1397:    * matching rtm_protocol, but that is OK when dedicated priority is used.
 1398:    *
 1399:    * We do not use NL_OP_REPLACE for IPv6, as it has broken semantics for ECMP
 1400:    * and with some kernel versions ECMP replace crashes kernel. Would need more
 1401:    * testing and checks for kernel versions.
 1402:    *
 1403:    * For IPv6, we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the
 1404:    * old route value, so we do not try to optimize IPv6 ECMP reconfigurations.
 1405:    */
 1406: 
 1407:   if (krt_ipv4(p) && old && new)
 1408:   {
 1409:     err = nl_replace_rte(p, new);
 1410:   }
 1411:   else
 1412:   {
 1413:     if (old)
 1414:       nl_delete_rte(p, old);
 1415: 
 1416:     if (new)
 1417:       err = nl_add_rte(p, new);
 1418:   }
 1419: 
 1420:   if (err < 0)
 1421:     n->n.flags |= KRF_SYNC_ERROR;
 1422:   else
 1423:     n->n.flags &= ~KRF_SYNC_ERROR;
 1424: }
 1425: 
 1426: static int
 1427: nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family)
 1428: {
 1429:   /* Route merging is used for IPv6 scans */
 1430:   if (!s->scan || (rtm_family != AF_INET6))
 1431:     return 0;
 1432: 
 1433:   /* Saved and new route must have same network, proto/table, and priority */
 1434:   if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
 1435:     return 0;
 1436: 
 1437:   /* Both must be regular unicast routes */
 1438:   if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
 1439:     return 0;
 1440: 
 1441:   return 1;
 1442: }
 1443: 
 1444: static void
 1445: nl_announce_route(struct nl_parse_state *s)
 1446: {
 1447:   rte *e = rte_get_temp(s->attrs);
 1448:   e->net = s->net;
 1449:   e->u.krt.src = s->krt_src;
 1450:   e->u.krt.proto = s->krt_proto;
 1451:   e->u.krt.seen = 0;
 1452:   e->u.krt.best = 0;
 1453:   e->u.krt.metric = s->krt_metric;
 1454: 
 1455:   if (s->scan)
 1456:     krt_got_route(s->proto, e);
 1457:   else
 1458:     krt_got_route_async(s->proto, e, s->new);
 1459: 
 1460:   s->net = NULL;
 1461:   s->attrs = NULL;
 1462:   s->proto = NULL;
 1463:   lp_flush(s->pool);
 1464: }
 1465: 
 1466: static inline void
 1467: nl_parse_begin(struct nl_parse_state *s, int scan)
 1468: {
 1469:   memset(s, 0, sizeof (struct nl_parse_state));
 1470:   s->pool = nl_linpool;
 1471:   s->scan = scan;
 1472: }
 1473: 
 1474: static inline void
 1475: nl_parse_end(struct nl_parse_state *s)
 1476: {
 1477:   if (s->net)
 1478:     nl_announce_route(s);
 1479: }
 1480: 
 1481: 
 1482: #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
 1483: 
 1484: static void
 1485: nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
 1486: {
 1487:   struct krt_proto *p;
 1488:   struct rtmsg *i;
 1489:   struct rtattr *a[BIRD_RTA_MAX];
 1490:   int new = h->nlmsg_type == RTM_NEWROUTE;
 1491: 
 1492:   net_addr dst, src = {};
 1493:   u32 oif = ~0;
 1494:   u32 table_id;
 1495:   u32 priority = 0;
 1496:   u32 def_scope = RT_SCOPE_UNIVERSE;
 1497:   int krt_src;
 1498: 
 1499:   if (!(i = nl_checkin(h, sizeof(*i))))
 1500:     return;
 1501: 
 1502:   switch (i->rtm_family)
 1503:     {
 1504:     case AF_INET:
 1505:       if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
 1506: 	return;
 1507: 
 1508:       if (a[RTA_DST])
 1509: 	net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
 1510:       else
 1511: 	net_fill_ip4(&dst, IP4_NONE, 0);
 1512:       break;
 1513: 
 1514:     case AF_INET6:
 1515:       if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
 1516: 	return;
 1517: 
 1518:       if (a[RTA_DST])
 1519: 	net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
 1520:       else
 1521: 	net_fill_ip6(&dst, IP6_NONE, 0);
 1522: 
 1523:       if (a[RTA_SRC])
 1524: 	net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len);
 1525:       else
 1526: 	net_fill_ip6(&src, IP6_NONE, 0);
 1527:       break;
 1528: 
 1529: #ifdef HAVE_MPLS_KERNEL
 1530:     case AF_MPLS:
 1531:       if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
 1532: 	return;
 1533: 
 1534:       if (!a[RTA_DST])
 1535: 	SKIP("MPLS route without RTA_DST");
 1536: 
 1537:       if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
 1538: 	SKIP("MPLS route with multi-label RTA_DST");
 1539: 
 1540:       net_fill_mpls(&dst, rta_mpls_stack[0]);
 1541:       break;
 1542: #endif
 1543: 
 1544:     default:
 1545:       return;
 1546:     }
 1547: 
 1548:   if (a[RTA_OIF])
 1549:     oif = rta_get_u32(a[RTA_OIF]);
 1550: 
 1551:   if (a[RTA_TABLE])
 1552:     table_id = rta_get_u32(a[RTA_TABLE]);
 1553:   else
 1554:     table_id = i->rtm_table;
 1555: 
 1556:   /* Do we know this table? */
 1557:   p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
 1558:   if (!p)
 1559:     SKIP("unknown table %u\n", table_id);
 1560: 
 1561:   if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR))
 1562:     SKIP("src prefix for non-SADR channel\n");
 1563: 
 1564:   if (a[RTA_IIF])
 1565:     SKIP("IIF set\n");
 1566: 
 1567:   if (i->rtm_tos != 0)			/* We don't support TOS */
 1568:     SKIP("TOS %02x\n", i->rtm_tos);
 1569: 
 1570:   if (s->scan && !new)
 1571:     SKIP("RTM_DELROUTE in scan\n");
 1572: 
 1573:   if (a[RTA_PRIORITY])
 1574:     priority = rta_get_u32(a[RTA_PRIORITY]);
 1575: 
 1576:   int c = net_classify(&dst);
 1577:   if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
 1578:     SKIP("strange class/scope\n");
 1579: 
 1580:   switch (i->rtm_protocol)
 1581:     {
 1582:     case RTPROT_UNSPEC:
 1583:       SKIP("proto unspec\n");
 1584: 
 1585:     case RTPROT_REDIRECT:
 1586:       krt_src = KRT_SRC_REDIRECT;
 1587:       break;
 1588: 
 1589:     case RTPROT_KERNEL:
 1590:       krt_src = KRT_SRC_KERNEL;
 1591:       return;
 1592: 
 1593:     case RTPROT_BIRD:
 1594:       if (!s->scan)
 1595: 	SKIP("echo\n");
 1596:       krt_src = KRT_SRC_BIRD;
 1597:       break;
 1598: 
 1599:     case RTPROT_BOOT:
 1600:     default:
 1601:       krt_src = KRT_SRC_ALIEN;
 1602:     }
 1603: 
 1604:   net_addr *n = &dst;
 1605:   if (p->p.net_type == NET_IP6_SADR)
 1606:   {
 1607:     n = alloca(sizeof(net_addr_ip6_sadr));
 1608:     net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst),
 1609: 		      net6_prefix(&src), net6_pxlen(&src));
 1610:   }
 1611: 
 1612:   net *net = net_get(p->p.main_channel->table, n);
 1613: 
 1614:   if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family))
 1615:     nl_announce_route(s);
 1616: 
 1617:   rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
 1618:   ra->src = p->p.main_source;
 1619:   ra->source = RTS_INHERIT;
 1620:   ra->scope = SCOPE_UNIVERSE;
 1621: 
 1622:   switch (i->rtm_type)
 1623:     {
 1624:     case RTN_UNICAST:
 1625:       ra->dest = RTD_UNICAST;
 1626: 
 1627:       if (a[RTA_MULTIPATH])
 1628:         {
 1629: 	  struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family);
 1630: 	  if (!nh)
 1631: 	    {
 1632: 	      log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
 1633: 	      return;
 1634: 	    }
 1635: 
 1636: 	  nexthop_link(ra, nh);
 1637: 	  break;
 1638: 	}
 1639: 
 1640:       ra->nh.iface = if_find_by_index(oif);
 1641:       if (!ra->nh.iface)
 1642: 	{
 1643: 	  log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
 1644: 	  return;
 1645: 	}
 1646: 
 1647:       if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY]
 1648: #ifdef HAVE_MPLS_KERNEL
 1649: 	  || (i->rtm_family == AF_MPLS) && a[RTA_VIA]
 1650: #endif
 1651: 	  )
 1652: 	{
 1653: #ifdef HAVE_MPLS_KERNEL
 1654: 	  if (i->rtm_family == AF_MPLS)
 1655: 	    ra->nh.gw = rta_get_via(a[RTA_VIA]);
 1656: 	  else
 1657: #endif
 1658: 	    ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
 1659: 
 1660: 	  /* Silently skip strange 6to4 routes */
 1661: 	  const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
 1662: 	  if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
 1663: 	    return;
 1664: 
 1665: 	  if (i->rtm_flags & RTNH_F_ONLINK)
 1666: 	    ra->nh.flags |= RNF_ONLINK;
 1667: 
 1668: 	  neighbor *nbr;
 1669: 	  nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
 1670: 			   (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
 1671: 	  if (!nbr || (nbr->scope == SCOPE_HOST))
 1672: 	    {
 1673: 	      log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
 1674:                   ra->nh.gw);
 1675: 	      return;
 1676: 	    }
 1677: 	}
 1678: 
 1679:       break;
 1680:     case RTN_BLACKHOLE:
 1681:       ra->dest = RTD_BLACKHOLE;
 1682:       break;
 1683:     case RTN_UNREACHABLE:
 1684:       ra->dest = RTD_UNREACHABLE;
 1685:       break;
 1686:     case RTN_PROHIBIT:
 1687:       ra->dest = RTD_PROHIBIT;
 1688:       break;
 1689:     /* FIXME: What about RTN_THROW? */
 1690:     default:
 1691:       SKIP("type %d\n", i->rtm_type);
 1692:       return;
 1693:     }
 1694: 
 1695: #ifdef HAVE_MPLS_KERNEL
 1696:   if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
 1697:     ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
 1698: 
 1699:   if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
 1700:     {
 1701:       switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
 1702: 	{
 1703: 	  case LWTUNNEL_ENCAP_MPLS:
 1704: 	    {
 1705: 	      struct rtattr *enca[BIRD_RTA_MAX];
 1706: 	      nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
 1707: 	      nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
 1708: 	      ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
 1709: 	      break;
 1710: 	    }
 1711: 	  default:
 1712: 	    SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
 1713: 	    break;
 1714: 	}
 1715:     }
 1716: #endif
 1717: 
 1718:   if (i->rtm_scope != def_scope)
 1719:     {
 1720:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
 1721:       ea->next = ra->eattrs;
 1722:       ra->eattrs = ea;
 1723:       ea->flags = EALF_SORTED;
 1724:       ea->count = 1;
 1725:       ea->attrs[0].id = EA_KRT_SCOPE;
 1726:       ea->attrs[0].flags = 0;
 1727:       ea->attrs[0].type = EAF_TYPE_INT;
 1728:       ea->attrs[0].u.data = i->rtm_scope;
 1729:     }
 1730: 
 1731:   if (a[RTA_PREFSRC])
 1732:     {
 1733:       ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
 1734: 
 1735:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
 1736:       ea->next = ra->eattrs;
 1737:       ra->eattrs = ea;
 1738:       ea->flags = EALF_SORTED;
 1739:       ea->count = 1;
 1740:       ea->attrs[0].id = EA_KRT_PREFSRC;
 1741:       ea->attrs[0].flags = 0;
 1742:       ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
 1743: 
 1744:       struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
 1745:       ad->length = sizeof(ps);
 1746:       memcpy(ad->data, &ps, sizeof(ps));
 1747: 
 1748:       ea->attrs[0].u.ptr = ad;
 1749:     }
 1750: 
 1751:   if (a[RTA_FLOW])
 1752:     {
 1753:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
 1754:       ea->next = ra->eattrs;
 1755:       ra->eattrs = ea;
 1756:       ea->flags = EALF_SORTED;
 1757:       ea->count = 1;
 1758:       ea->attrs[0].id = EA_KRT_REALM;
 1759:       ea->attrs[0].flags = 0;
 1760:       ea->attrs[0].type = EAF_TYPE_INT;
 1761:       ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
 1762:     }
 1763: 
 1764:   if (a[RTA_METRICS])
 1765:     {
 1766:       u32 metrics[KRT_METRICS_MAX];
 1767:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
 1768:       int t, n = 0;
 1769: 
 1770:       if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
 1771:         {
 1772: 	  log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
 1773: 	  return;
 1774: 	}
 1775: 
 1776:       for (t = 1; t < KRT_METRICS_MAX; t++)
 1777: 	if (metrics[0] & (1 << t))
 1778: 	  {
 1779: 	    ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
 1780: 	    ea->attrs[n].flags = 0;
 1781: 	    ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
 1782: 	    ea->attrs[n].u.data = metrics[t];
 1783: 	    n++;
 1784: 	  }
 1785: 
 1786:       if (n > 0)
 1787:         {
 1788: 	  ea->next = ra->eattrs;
 1789: 	  ea->flags = EALF_SORTED;
 1790: 	  ea->count = n;
 1791: 	  ra->eattrs = ea;
 1792: 	}
 1793:     }
 1794: 
 1795:   /*
 1796:    * Ideally, now we would send the received route to the rest of kernel code.
 1797:    * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
 1798:    * postpone it and merge next hops until the end of the sequence. Note that
 1799:    * when doing merging of next hops, we expect the new route to be unipath.
 1800:    * Otherwise, we ignore additional next hops in nexthop_insert().
 1801:    */
 1802: 
 1803:   if (!s->net)
 1804:   {
 1805:     /* Store the new route */
 1806:     s->net = net;
 1807:     s->attrs = ra;
 1808:     s->proto = p;
 1809:     s->new = new;
 1810:     s->krt_src = krt_src;
 1811:     s->krt_type = i->rtm_type;
 1812:     s->krt_proto = i->rtm_protocol;
 1813:     s->krt_metric = priority;
 1814:   }
 1815:   else
 1816:   {
 1817:     /* Merge next hops with the stored route */
 1818:     rta *oa = s->attrs;
 1819: 
 1820:     struct nexthop *nhs = &oa->nh;
 1821:     nexthop_insert(&nhs, &ra->nh);
 1822: 
 1823:     /* Perhaps new nexthop is inserted at the first position */
 1824:     if (nhs == &ra->nh)
 1825:     {
 1826:       /* Swap rtas */
 1827:       s->attrs = ra;
 1828: 
 1829:       /* Keep old eattrs */
 1830:       ra->eattrs = oa->eattrs;
 1831:     }
 1832:   }
 1833: }
 1834: 
 1835: void
 1836: krt_do_scan(struct krt_proto *p UNUSED)	/* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
 1837: {
 1838:   struct nlmsghdr *h;
 1839:   struct nl_parse_state s;
 1840: 
 1841:   nl_parse_begin(&s, 1);
 1842:   nl_request_dump(AF_UNSPEC, RTM_GETROUTE);
 1843:   while (h = nl_get_scan())
 1844:     if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
 1845:       nl_parse_route(&s, h);
 1846:     else
 1847:       log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
 1848:   nl_parse_end(&s);
 1849: }
 1850: 
 1851: /*
 1852:  *	Asynchronous Netlink interface
 1853:  */
 1854: 
 1855: static sock *nl_async_sk;		/* BIRD socket for asynchronous notifications */
 1856: static byte *nl_async_rx_buffer;	/* Receive buffer */
 1857: 
 1858: static void
 1859: nl_async_msg(struct nlmsghdr *h)
 1860: {
 1861:   struct nl_parse_state s;
 1862: 
 1863:   switch (h->nlmsg_type)
 1864:     {
 1865:     case RTM_NEWROUTE:
 1866:     case RTM_DELROUTE:
 1867:       DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
 1868:       nl_parse_begin(&s, 0);
 1869:       nl_parse_route(&s, h);
 1870:       nl_parse_end(&s);
 1871:       break;
 1872:     case RTM_NEWLINK:
 1873:     case RTM_DELLINK:
 1874:       DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
 1875:       if (kif_proto)
 1876: 	nl_parse_link(h, 0);
 1877:       break;
 1878:     case RTM_NEWADDR:
 1879:     case RTM_DELADDR:
 1880:       DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
 1881:       if (kif_proto)
 1882: 	nl_parse_addr(h, 0);
 1883:       break;
 1884:     default:
 1885:       DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
 1886:     }
 1887: }
 1888: 
 1889: static int
 1890: nl_async_hook(sock *sk, uint size UNUSED)
 1891: {
 1892:   struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
 1893:   struct sockaddr_nl sa;
 1894:   struct msghdr m = {
 1895:     .msg_name = &sa,
 1896:     .msg_namelen = sizeof(sa),
 1897:     .msg_iov = &iov,
 1898:     .msg_iovlen = 1,
 1899:   };
 1900:   struct nlmsghdr *h;
 1901:   int x;
 1902:   uint len;
 1903: 
 1904:   x = recvmsg(sk->fd, &m, 0);
 1905:   if (x < 0)
 1906:     {
 1907:       if (errno == ENOBUFS)
 1908: 	{
 1909: 	  /*
 1910: 	   *  Netlink reports some packets have been thrown away.
 1911: 	   *  One day we might react to it by asking for route table
 1912: 	   *  scan in near future.
 1913: 	   */
 1914: 	  log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
 1915: 	  return 1;	/* More data are likely to be ready */
 1916: 	}
 1917:       else if (errno != EWOULDBLOCK)
 1918: 	log(L_ERR "Netlink recvmsg: %m");
 1919:       return 0;
 1920:     }
 1921:   if (sa.nl_pid)		/* It isn't from the kernel */
 1922:     {
 1923:       DBG("Non-kernel packet\n");
 1924:       return 1;
 1925:     }
 1926:   h = (void *) nl_async_rx_buffer;
 1927:   len = x;
 1928:   if (m.msg_flags & MSG_TRUNC)
 1929:     {
 1930:       log(L_WARN "Netlink got truncated asynchronous message");
 1931:       return 1;
 1932:     }
 1933:   while (NLMSG_OK(h, len))
 1934:     {
 1935:       nl_async_msg(h);
 1936:       h = NLMSG_NEXT(h, len);
 1937:     }
 1938:   if (len)
 1939:     log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
 1940:   return 1;
 1941: }
 1942: 
 1943: static void
 1944: nl_async_err_hook(sock *sk, int e UNUSED)
 1945: {
 1946:   nl_async_hook(sk, 0);
 1947: }
 1948: 
 1949: static void
 1950: nl_open_async(void)
 1951: {
 1952:   sock *sk;
 1953:   struct sockaddr_nl sa;
 1954:   int fd;
 1955: 
 1956:   if (nl_async_sk)
 1957:     return;
 1958: 
 1959:   DBG("KRT: Opening async netlink socket\n");
 1960: 
 1961:   fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
 1962:   if (fd < 0)
 1963:     {
 1964:       log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
 1965:       return;
 1966:     }
 1967: 
 1968:   bzero(&sa, sizeof(sa));
 1969:   sa.nl_family = AF_NETLINK;
 1970:   sa.nl_groups = RTMGRP_LINK |
 1971:     RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
 1972:     RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
 1973: 
 1974:   if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
 1975:     {
 1976:       log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
 1977:       close(fd);
 1978:       return;
 1979:     }
 1980: 
 1981:   nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
 1982: 
 1983:   sk = nl_async_sk = sk_new(krt_pool);
 1984:   sk->type = SK_MAGIC;
 1985:   sk->rx_hook = nl_async_hook;
 1986:   sk->err_hook = nl_async_err_hook;
 1987:   sk->fd = fd;
 1988:   if (sk_open(sk) < 0)
 1989:     bug("Netlink: sk_open failed");
 1990: }
 1991: 
 1992: 
 1993: /*
 1994:  *	Interface to the UNIX krt module
 1995:  */
 1996: 
 1997: void
 1998: krt_sys_io_init(void)
 1999: {
 2000:   nl_linpool = lp_new_default(krt_pool);
 2001:   HASH_INIT(nl_table_map, krt_pool, 6);
 2002: }
 2003: 
 2004: int
 2005: krt_sys_start(struct krt_proto *p)
 2006: {
 2007:   struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
 2008: 
 2009:   if (old)
 2010:     {
 2011:       log(L_ERR "%s: Kernel table %u already registered by %s",
 2012: 	  p->p.name, krt_table_id(p), old->p.name);
 2013:       return 0;
 2014:     }
 2015: 
 2016:   HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
 2017: 
 2018:   nl_open();
 2019:   nl_open_async();
 2020: 
 2021:   return 1;
 2022: }
 2023: 
 2024: void
 2025: krt_sys_shutdown(struct krt_proto *p)
 2026: {
 2027:   HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
 2028: }
 2029: 
 2030: int
 2031: krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
 2032: {
 2033:   return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
 2034: }
 2035: 
 2036: void
 2037: krt_sys_init_config(struct krt_config *cf)
 2038: {
 2039:   cf->sys.table_id = RT_TABLE_MAIN;
 2040:   cf->sys.metric = 32;
 2041: }
 2042: 
 2043: void
 2044: krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
 2045: {
 2046:   d->sys.table_id = s->sys.table_id;
 2047:   d->sys.metric = s->sys.metric;
 2048: }
 2049: 
 2050: static const char *krt_metrics_names[KRT_METRICS_MAX] = {
 2051:   NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
 2052:   "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
 2053: };
 2054: 
 2055: static const char *krt_features_names[KRT_FEATURES_MAX] = {
 2056:   "ecn", NULL, NULL, "allfrag"
 2057: };
 2058: 
 2059: int
 2060: krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
 2061: {
 2062:   switch (a->id)
 2063:   {
 2064:   case EA_KRT_PREFSRC:
 2065:     bsprintf(buf, "prefsrc");
 2066:     return GA_NAME;
 2067: 
 2068:   case EA_KRT_REALM:
 2069:     bsprintf(buf, "realm");
 2070:     return GA_NAME;
 2071: 
 2072:   case EA_KRT_SCOPE:
 2073:     bsprintf(buf, "scope");
 2074:     return GA_NAME;
 2075: 
 2076:   case EA_KRT_LOCK:
 2077:     buf += bsprintf(buf, "lock:");
 2078:     ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
 2079:     return GA_FULL;
 2080: 
 2081:   case EA_KRT_FEATURES:
 2082:     buf += bsprintf(buf, "features:");
 2083:     ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
 2084:     return GA_FULL;
 2085: 
 2086:   default:;
 2087:     int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
 2088:     if (id > 0 && id < KRT_METRICS_MAX)
 2089:     {
 2090:       bsprintf(buf, "%s", krt_metrics_names[id]);
 2091:       return GA_NAME;
 2092:     }
 2093: 
 2094:     return GA_UNKNOWN;
 2095:   }
 2096: }
 2097: 
 2098: 
 2099: 
 2100: void
 2101: kif_sys_start(struct kif_proto *p UNUSED)
 2102: {
 2103:   nl_open();
 2104:   nl_open_async();
 2105: }
 2106: 
 2107: void
 2108: kif_sys_shutdown(struct kif_proto *p UNUSED)
 2109: {
 2110: }
 2111: 
 2112: int
 2113: kif_update_sysdep_addr(struct iface *i UNUSED)
 2114: {
 2115:   return 0;
 2116: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>