Annotation of embedaddon/bird2/sysdep/linux/netlink.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:  *     BIRD -- Linux Netlink Interface
                      3:  *
                      4:  *     (c) 1999--2000 Martin Mares <mj@ucw.cz>
                      5:  *
                      6:  *     Can be freely distributed and used under the terms of the GNU GPL.
                      7:  */
                      8: 
                      9: #include <alloca.h>
                     10: #include <stdio.h>
                     11: #include <unistd.h>
                     12: #include <fcntl.h>
                     13: #include <sys/socket.h>
                     14: #include <sys/uio.h>
                     15: #include <errno.h>
                     16: 
                     17: #undef LOCAL_DEBUG
                     18: 
                     19: #include "nest/bird.h"
                     20: #include "nest/route.h"
                     21: #include "nest/protocol.h"
                     22: #include "nest/iface.h"
                     23: #include "lib/alloca.h"
                     24: #include "sysdep/unix/unix.h"
                     25: #include "sysdep/unix/krt.h"
                     26: #include "lib/socket.h"
                     27: #include "lib/string.h"
                     28: #include "lib/hash.h"
                     29: #include "conf/conf.h"
                     30: 
                     31: #include <asm/types.h>
                     32: #include <linux/if.h>
                     33: #include <linux/netlink.h>
                     34: #include <linux/rtnetlink.h>
                     35: 
                     36: #ifdef HAVE_MPLS_KERNEL
                     37: #include <linux/lwtunnel.h>
                     38: #endif
                     39: 
                     40: #ifndef MSG_TRUNC                      /* Hack: Several versions of glibc miss this one :( */
                     41: #define MSG_TRUNC 0x20
                     42: #endif
                     43: 
                     44: #ifndef IFA_FLAGS
                     45: #define IFA_FLAGS 8
                     46: #endif
                     47: 
                     48: #ifndef IFF_LOWER_UP
                     49: #define IFF_LOWER_UP 0x10000
                     50: #endif
                     51: 
                     52: #ifndef RTA_TABLE
                     53: #define RTA_TABLE  15
                     54: #endif
                     55: 
                     56: #ifndef RTA_VIA
                     57: #define RTA_VIA         18
                     58: #endif
                     59: 
                     60: #ifndef RTA_NEWDST
                     61: #define RTA_NEWDST  19
                     62: #endif
                     63: 
                     64: #ifndef RTA_ENCAP_TYPE
                     65: #define RTA_ENCAP_TYPE 21
                     66: #endif
                     67: 
                     68: #ifndef RTA_ENCAP
                     69: #define RTA_ENCAP  22
                     70: #endif
                     71: 
                     72: #define krt_ipv4(p) ((p)->af == AF_INET)
                     73: #define krt_ecmp6(p) ((p)->af == AF_INET6)
                     74: 
                     75: const int rt_default_ecmp = 16;
                     76: 
                     77: /*
                     78:  * Structure nl_parse_state keeps state of received route processing. Ideally,
                     79:  * we could just independently parse received Netlink messages and immediately
                     80:  * propagate received routes to the rest of BIRD, but older Linux kernel (before
                     81:  * version 4.11) represents and announces IPv6 ECMP routes not as one route with
                     82:  * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
                     83:  * routes with the same prefix. More recent kernels work as with IPv4.
                     84:  *
                     85:  * Therefore, BIRD keeps currently processed route in nl_parse_state structure
                     86:  * and postpones its propagation until we expect it to be final; i.e., when
                     87:  * non-matching route is received or when the scan ends. When another matching
                     88:  * route is received, it is merged with the already processed route to form an
                     89:  * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
                     90:  * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
                     91:  * routes with RTA_MULTIPATH set are just considered non-matching.
                     92:  *
                     93:  * This is ignored for asynchronous notifications (every notification is handled
                     94:  * as a separate route). It is not an issue for our routes, as we ignore such
                     95:  * notifications anyways. But importing alien IPv6 ECMP routes does not work
                     96:  * properly with older kernels.
                     97:  *
                     98:  * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
                     99:  * for the same prefix.
                    100:  */
                    101: 
                    102: struct nl_parse_state
                    103: {
                    104:   struct linpool *pool;
                    105:   int scan;
                    106:   int merge;
                    107: 
                    108:   net *net;
                    109:   rta *attrs;
                    110:   struct krt_proto *proto;
                    111:   s8 new;
                    112:   s8 krt_src;
                    113:   u8 krt_type;
                    114:   u8 krt_proto;
                    115:   u32 krt_metric;
                    116: };
                    117: 
                    118: /*
                    119:  *     Synchronous Netlink interface
                    120:  */
                    121: 
                    122: struct nl_sock
                    123: {
                    124:   int fd;
                    125:   u32 seq;
                    126:   byte *rx_buffer;                     /* Receive buffer */
                    127:   struct nlmsghdr *last_hdr;           /* Recently received packet */
                    128:   uint last_size;
                    129: };
                    130: 
                    131: #define NL_RX_SIZE 8192
                    132: 
                    133: #define NL_OP_DELETE   0
                    134: #define NL_OP_ADD      (NLM_F_CREATE|NLM_F_EXCL)
                    135: #define NL_OP_REPLACE  (NLM_F_CREATE|NLM_F_REPLACE)
                    136: #define NL_OP_APPEND   (NLM_F_CREATE|NLM_F_APPEND)
                    137: 
                    138: static linpool *nl_linpool;
                    139: 
                    140: static struct nl_sock nl_scan = {.fd = -1};    /* Netlink socket for synchronous scan */
                    141: static struct nl_sock nl_req  = {.fd = -1};    /* Netlink socket for requests */
                    142: 
                    143: static void
                    144: nl_open_sock(struct nl_sock *nl)
                    145: {
                    146:   if (nl->fd < 0)
                    147:     {
                    148:       nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
                    149:       if (nl->fd < 0)
                    150:        die("Unable to open rtnetlink socket: %m");
                    151:       nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
                    152:       nl->rx_buffer = xmalloc(NL_RX_SIZE);
                    153:       nl->last_hdr = NULL;
                    154:       nl->last_size = 0;
                    155:     }
                    156: }
                    157: 
                    158: static void
                    159: nl_open(void)
                    160: {
                    161:   nl_open_sock(&nl_scan);
                    162:   nl_open_sock(&nl_req);
                    163: }
                    164: 
                    165: static void
                    166: nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
                    167: {
                    168:   struct sockaddr_nl sa;
                    169: 
                    170:   memset(&sa, 0, sizeof(sa));
                    171:   sa.nl_family = AF_NETLINK;
                    172:   nh->nlmsg_pid = 0;
                    173:   nh->nlmsg_seq = ++(nl->seq);
                    174:   if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
                    175:     die("rtnetlink sendto: %m");
                    176:   nl->last_hdr = NULL;
                    177: }
                    178: 
                    179: static void
                    180: nl_request_dump(int af, int cmd)
                    181: {
                    182:   struct {
                    183:     struct nlmsghdr nh;
                    184:     struct rtgenmsg g;
                    185:   } req = {
                    186:     .nh.nlmsg_type = cmd,
                    187:     .nh.nlmsg_len = sizeof(req),
                    188:     .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
                    189:     .g.rtgen_family = af
                    190:   };
                    191:   nl_send(&nl_scan, &req.nh);
                    192: }
                    193: 
                    194: static struct nlmsghdr *
                    195: nl_get_reply(struct nl_sock *nl)
                    196: {
                    197:   for(;;)
                    198:     {
                    199:       if (!nl->last_hdr)
                    200:        {
                    201:          struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
                    202:          struct sockaddr_nl sa;
                    203:          struct msghdr m = {
                    204:            .msg_name = &sa,
                    205:            .msg_namelen = sizeof(sa),
                    206:            .msg_iov = &iov,
                    207:            .msg_iovlen = 1,
                    208:          };
                    209:          int x = recvmsg(nl->fd, &m, 0);
                    210:          if (x < 0)
                    211:            die("nl_get_reply: %m");
                    212:          if (sa.nl_pid)                /* It isn't from the kernel */
                    213:            {
                    214:              DBG("Non-kernel packet\n");
                    215:              continue;
                    216:            }
                    217:          nl->last_size = x;
                    218:          nl->last_hdr = (void *) nl->rx_buffer;
                    219:          if (m.msg_flags & MSG_TRUNC)
                    220:            bug("nl_get_reply: got truncated reply which should be impossible");
                    221:        }
                    222:       if (NLMSG_OK(nl->last_hdr, nl->last_size))
                    223:        {
                    224:          struct nlmsghdr *h = nl->last_hdr;
                    225:          nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
                    226:          if (h->nlmsg_seq != nl->seq)
                    227:            {
                    228:              log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
                    229:                  h->nlmsg_seq, nl->seq);
                    230:              continue;
                    231:            }
                    232:          return h;
                    233:        }
                    234:       if (nl->last_size)
                    235:        log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
                    236:       nl->last_hdr = NULL;
                    237:     }
                    238: }
                    239: 
                    240: static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
                    241: 
                    242: static int
                    243: nl_error(struct nlmsghdr *h, int ignore_esrch)
                    244: {
                    245:   struct nlmsgerr *e;
                    246:   int ec;
                    247: 
                    248:   if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
                    249:     {
                    250:       log(L_WARN "Netlink: Truncated error message received");
                    251:       return ENOBUFS;
                    252:     }
                    253:   e = (struct nlmsgerr *) NLMSG_DATA(h);
                    254:   ec = -e->error;
                    255:   if (ec && !(ignore_esrch && (ec == ESRCH)))
                    256:     log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
                    257:   return ec;
                    258: }
                    259: 
                    260: static struct nlmsghdr *
                    261: nl_get_scan(void)
                    262: {
                    263:   struct nlmsghdr *h = nl_get_reply(&nl_scan);
                    264: 
                    265:   if (h->nlmsg_type == NLMSG_DONE)
                    266:     return NULL;
                    267:   if (h->nlmsg_type == NLMSG_ERROR)
                    268:     {
                    269:       nl_error(h, 0);
                    270:       return NULL;
                    271:     }
                    272:   return h;
                    273: }
                    274: 
                    275: static int
                    276: nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
                    277: {
                    278:   struct nlmsghdr *h;
                    279: 
                    280:   nl_send(&nl_req, pkt);
                    281:   for(;;)
                    282:     {
                    283:       h = nl_get_reply(&nl_req);
                    284:       if (h->nlmsg_type == NLMSG_ERROR)
                    285:        break;
                    286:       log(L_WARN "nl_exchange: Unexpected reply received");
                    287:     }
                    288:   return nl_error(h, ignore_esrch) ? -1 : 0;
                    289: }
                    290: 
                    291: /*
                    292:  *     Netlink attributes
                    293:  */
                    294: 
                    295: static int nl_attr_len;
                    296: 
                    297: static void *
                    298: nl_checkin(struct nlmsghdr *h, int lsize)
                    299: {
                    300:   nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
                    301:   if (nl_attr_len < 0)
                    302:     {
                    303:       log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
                    304:       return NULL;
                    305:     }
                    306:   return NLMSG_DATA(h);
                    307: }
                    308: 
                    309: struct nl_want_attrs {
                    310:   u8 defined:1;
                    311:   u8 checksize:1;
                    312:   u8 size;
                    313: };
                    314: 
                    315: 
                    316: #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
                    317: 
                    318: static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
                    319:   [IFLA_IFNAME]          = { 1, 0, 0 },
                    320:   [IFLA_MTU]     = { 1, 1, sizeof(u32) },
                    321:   [IFLA_MASTER]          = { 1, 1, sizeof(u32) },
                    322:   [IFLA_WIRELESS] = { 1, 0, 0 },
                    323: };
                    324: 
                    325: 
                    326: #define BIRD_IFA_MAX  (IFA_FLAGS+1)
                    327: 
                    328: static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
                    329:   [IFA_ADDRESS]          = { 1, 1, sizeof(ip4_addr) },
                    330:   [IFA_LOCAL]    = { 1, 1, sizeof(ip4_addr) },
                    331:   [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
                    332:   [IFA_FLAGS]     = { 1, 1, sizeof(u32) },
                    333: };
                    334: 
                    335: static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
                    336:   [IFA_ADDRESS]          = { 1, 1, sizeof(ip6_addr) },
                    337:   [IFA_LOCAL]    = { 1, 1, sizeof(ip6_addr) },
                    338:   [IFA_FLAGS]    = { 1, 1, sizeof(u32) },
                    339: };
                    340: 
                    341: 
                    342: #define BIRD_RTA_MAX  (RTA_ENCAP+1)
                    343: 
                    344: static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
                    345:   [RTA_GATEWAY]          = { 1, 1, sizeof(ip4_addr) },
                    346:   [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
                    347:   [RTA_ENCAP]    = { 1, 0, 0 },
                    348: };
                    349: 
                    350: static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = {
                    351:   [RTA_GATEWAY]          = { 1, 1, sizeof(ip6_addr) },
                    352:   [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
                    353:   [RTA_ENCAP]    = { 1, 0, 0 },
                    354: };
                    355: 
                    356: #ifdef HAVE_MPLS_KERNEL
                    357: static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
                    358:   [RTA_DST]       = { 1, 0, 0 },
                    359: };
                    360: #endif
                    361: 
                    362: static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
                    363:   [RTA_DST]      = { 1, 1, sizeof(ip4_addr) },
                    364:   [RTA_OIF]      = { 1, 1, sizeof(u32) },
                    365:   [RTA_GATEWAY]          = { 1, 1, sizeof(ip4_addr) },
                    366:   [RTA_PRIORITY]  = { 1, 1, sizeof(u32) },
                    367:   [RTA_PREFSRC]          = { 1, 1, sizeof(ip4_addr) },
                    368:   [RTA_METRICS]          = { 1, 0, 0 },
                    369:   [RTA_MULTIPATH] = { 1, 0, 0 },
                    370:   [RTA_FLOW]     = { 1, 1, sizeof(u32) },
                    371:   [RTA_TABLE]    = { 1, 1, sizeof(u32) },
                    372:   [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
                    373:   [RTA_ENCAP]    = { 1, 0, 0 },
                    374: };
                    375: 
                    376: static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
                    377:   [RTA_DST]      = { 1, 1, sizeof(ip6_addr) },
                    378:   [RTA_SRC]      = { 1, 1, sizeof(ip6_addr) },
                    379:   [RTA_IIF]      = { 1, 1, sizeof(u32) },
                    380:   [RTA_OIF]      = { 1, 1, sizeof(u32) },
                    381:   [RTA_GATEWAY]          = { 1, 1, sizeof(ip6_addr) },
                    382:   [RTA_PRIORITY]  = { 1, 1, sizeof(u32) },
                    383:   [RTA_PREFSRC]          = { 1, 1, sizeof(ip6_addr) },
                    384:   [RTA_METRICS]          = { 1, 0, 0 },
                    385:   [RTA_MULTIPATH] = { 1, 0, 0 },
                    386:   [RTA_FLOW]     = { 1, 1, sizeof(u32) },
                    387:   [RTA_TABLE]    = { 1, 1, sizeof(u32) },
                    388:   [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
                    389:   [RTA_ENCAP]    = { 1, 0, 0 },
                    390: };
                    391: 
                    392: #ifdef HAVE_MPLS_KERNEL
                    393: static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
                    394:   [RTA_DST]      = { 1, 1, sizeof(u32) },
                    395:   [RTA_IIF]      = { 1, 1, sizeof(u32) },
                    396:   [RTA_OIF]      = { 1, 1, sizeof(u32) },
                    397:   [RTA_PRIORITY]  = { 1, 1, sizeof(u32) },
                    398:   [RTA_METRICS]          = { 1, 0, 0 },
                    399:   [RTA_FLOW]     = { 1, 1, sizeof(u32) },
                    400:   [RTA_TABLE]    = { 1, 1, sizeof(u32) },
                    401:   [RTA_VIA]      = { 1, 0, 0 },
                    402:   [RTA_NEWDST]   = { 1, 0, 0 },
                    403: };
                    404: #endif
                    405: 
                    406: 
                    407: static int
                    408: nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
                    409: {
                    410:   int max = ksize / sizeof(struct rtattr *);
                    411:   bzero(k, ksize);
                    412: 
                    413:   for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
                    414:     {
                    415:       if ((a->rta_type >= max) || !want[a->rta_type].defined)
                    416:        continue;
                    417: 
                    418:       if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
                    419:        {
                    420:          log(L_ERR "nl_parse_attrs: Malformed attribute received");
                    421:          return 0;
                    422:        }
                    423: 
                    424:       k[a->rta_type] = a;
                    425:     }
                    426: 
                    427:   if (nl_attr_len)
                    428:     {
                    429:       log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
                    430:       return 0;
                    431:     }
                    432: 
                    433:   return 1;
                    434: }
                    435: 
                    436: static inline u16 rta_get_u16(struct rtattr *a)
                    437: { return *(u16 *) RTA_DATA(a); }
                    438: 
                    439: static inline u32 rta_get_u32(struct rtattr *a)
                    440: { return *(u32 *) RTA_DATA(a); }
                    441: 
                    442: static inline ip4_addr rta_get_ip4(struct rtattr *a)
                    443: { return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
                    444: 
                    445: static inline ip6_addr rta_get_ip6(struct rtattr *a)
                    446: { return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
                    447: 
                    448: static inline ip_addr rta_get_ipa(struct rtattr *a)
                    449: {
                    450:   if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
                    451:     return ipa_from_ip4(rta_get_ip4(a));
                    452:   else
                    453:     return ipa_from_ip6(rta_get_ip6(a));
                    454: }
                    455: 
                    456: #ifdef HAVE_MPLS_KERNEL
                    457: static inline ip_addr rta_get_via(struct rtattr *a)
                    458: {
                    459:   struct rtvia *v = RTA_DATA(a);
                    460:   switch(v->rtvia_family) {
                    461:     case AF_INET:  return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
                    462:     case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
                    463:   }
                    464:   return IPA_NONE;
                    465: }
                    466: 
                    467: static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
                    468: static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
                    469: {
                    470:   if (!a)
                    471:     return 0;
                    472: 
                    473:   if (RTA_PAYLOAD(a) % 4)
                    474:     log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
                    475: 
                    476:   int labels = mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
                    477: 
                    478:   if (labels < 0)
                    479:   {
                    480:     log(L_WARN "KRT: Too long MPLS stack received, ignoring");
                    481:     labels = 0;
                    482:   }
                    483: 
                    484:   return labels;
                    485: }
                    486: #endif
                    487: 
                    488: struct rtattr *
                    489: nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
                    490: {
                    491:   uint pos = NLMSG_ALIGN(h->nlmsg_len);
                    492:   uint len = RTA_LENGTH(dlen);
                    493: 
                    494:   if (pos + len > bufsize)
                    495:     bug("nl_add_attr: packet buffer overflow");
                    496: 
                    497:   struct rtattr *a = (struct rtattr *)((char *)h + pos);
                    498:   a->rta_type = code;
                    499:   a->rta_len = len;
                    500:   h->nlmsg_len = pos + len;
                    501: 
                    502:   if (dlen > 0)
                    503:     memcpy(RTA_DATA(a), data, dlen);
                    504: 
                    505:   return a;
                    506: }
                    507: 
                    508: static inline struct rtattr *
                    509: nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
                    510: {
                    511:   return nl_add_attr(h, bufsize, code, NULL, 0);
                    512: }
                    513: 
                    514: static inline void
                    515: nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
                    516: {
                    517:   a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
                    518: }
                    519: 
                    520: static inline void
                    521: nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
                    522: {
                    523:   nl_add_attr(h, bufsize, code, &data, 2);
                    524: }
                    525: 
                    526: static inline void
                    527: nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
                    528: {
                    529:   nl_add_attr(h, bufsize, code, &data, 4);
                    530: }
                    531: 
                    532: static inline void
                    533: nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
                    534: {
                    535:   ip4 = ip4_hton(ip4);
                    536:   nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
                    537: }
                    538: 
                    539: static inline void
                    540: nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
                    541: {
                    542:   ip6 = ip6_hton(ip6);
                    543:   nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
                    544: }
                    545: 
                    546: static inline void
                    547: nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
                    548: {
                    549:   if (ipa_is_ip4(ipa))
                    550:     nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
                    551:   else
                    552:     nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
                    553: }
                    554: 
                    555: #ifdef HAVE_MPLS_KERNEL
                    556: static inline void
                    557: nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
                    558: {
                    559:   char buf[len*4];
                    560:   mpls_put(buf, len, stack);
                    561:   nl_add_attr(h, bufsize, code, buf, len*4);
                    562: }
                    563: 
                    564: static inline void
                    565: nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
                    566: {
                    567:   nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
                    568: 
                    569:   struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
                    570:   nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
                    571:   nl_close_attr(h, nest);
                    572: }
                    573: 
                    574: static inline void
                    575: nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
                    576: {
                    577:   struct rtvia *via = alloca(sizeof(struct rtvia) + 16);
                    578: 
                    579:   if (ipa_is_ip4(ipa))
                    580:   {
                    581:     via->rtvia_family = AF_INET;
                    582:     put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
                    583:     nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4);
                    584:   }
                    585:   else
                    586:   {
                    587:     via->rtvia_family = AF_INET6;
                    588:     put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
                    589:     nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16);
                    590:   }
                    591: }
                    592: #endif
                    593: 
                    594: static inline struct rtnexthop *
                    595: nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
                    596: {
                    597:   uint pos = NLMSG_ALIGN(h->nlmsg_len);
                    598:   uint len = RTNH_LENGTH(0);
                    599: 
                    600:   if (pos + len > bufsize)
                    601:     bug("nl_open_nexthop: packet buffer overflow");
                    602: 
                    603:   h->nlmsg_len = pos + len;
                    604: 
                    605:   return (void *)h + pos;
                    606: }
                    607: 
                    608: static inline void
                    609: nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
                    610: {
                    611:   nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
                    612: }
                    613: 
                    614: static inline void
                    615: nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED)
                    616: {
                    617: #ifdef HAVE_MPLS_KERNEL
                    618:   if (nh->labels > 0)
                    619:     if (af == AF_MPLS)
                    620:       nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
                    621:     else
                    622:       nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
                    623: 
                    624:   if (ipa_nonzero(nh->gw))
                    625:     if (af == AF_MPLS)
                    626:       nl_add_attr_via(h, bufsize, nh->gw);
                    627:     else
                    628:       nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
                    629: #else
                    630: 
                    631:   if (ipa_nonzero(nh->gw))
                    632:     nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
                    633: #endif
                    634: }
                    635: 
                    636: static void
                    637: nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
                    638: {
                    639:   struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
                    640: 
                    641:   for (; nh; nh = nh->next)
                    642:   {
                    643:     struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
                    644: 
                    645:     rtnh->rtnh_flags = 0;
                    646:     rtnh->rtnh_hops = nh->weight;
                    647:     rtnh->rtnh_ifindex = nh->iface->index;
                    648: 
                    649:     nl_add_nexthop(h, bufsize, nh, af);
                    650: 
                    651:     if (nh->flags & RNF_ONLINK)
                    652:       rtnh->rtnh_flags |= RTNH_F_ONLINK;
                    653: 
                    654:     nl_close_nexthop(h, rtnh);
                    655:   }
                    656: 
                    657:   nl_close_attr(h, a);
                    658: }
                    659: 
                    660: static struct nexthop *
                    661: nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af)
                    662: {
                    663:   struct rtattr *a[BIRD_RTA_MAX];
                    664:   struct rtnexthop *nh = RTA_DATA(ra);
                    665:   struct nexthop *rv, *first, **last;
                    666:   unsigned len = RTA_PAYLOAD(ra);
                    667: 
                    668:   first = NULL;
                    669:   last = &first;
                    670: 
                    671:   while (len)
                    672:     {
                    673:       /* Use RTNH_OK(nh,len) ?? */
                    674:       if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
                    675:        return NULL;
                    676: 
                    677:       *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
                    678:       last = &(rv->next);
                    679: 
                    680:       rv->weight = nh->rtnh_hops;
                    681:       rv->iface = if_find_by_index(nh->rtnh_ifindex);
                    682:       if (!rv->iface)
                    683:        return NULL;
                    684: 
                    685:       /* Nonexistent RTNH_PAYLOAD ?? */
                    686:       nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
                    687:       switch (af)
                    688:         {
                    689:        case AF_INET:
                    690:          if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
                    691:            return NULL;
                    692:          break;
                    693: 
                    694:        case AF_INET6:
                    695:          if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
                    696:            return NULL;
                    697:          break;
                    698: 
                    699:        default:
                    700:          return NULL;
                    701:        }
                    702: 
                    703:       if (a[RTA_GATEWAY])
                    704:        {
                    705:          rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
                    706: 
                    707:          if (nh->rtnh_flags & RTNH_F_ONLINK)
                    708:            rv->flags |= RNF_ONLINK;
                    709: 
                    710:          neighbor *nbr;
                    711:          nbr = neigh_find(&p->p, rv->gw, rv->iface,
                    712:                           (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
                    713:          if (!nbr || (nbr->scope == SCOPE_HOST))
                    714:            return NULL;
                    715:        }
                    716:       else
                    717:        rv->gw = IPA_NONE;
                    718: 
                    719: #ifdef HAVE_MPLS_KERNEL
                    720:       if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE])
                    721:       {
                    722:        if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
                    723:          log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
                    724:          return NULL;
                    725:        }
                    726: 
                    727:        struct rtattr *enca[BIRD_RTA_MAX];
                    728:        nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
                    729:        nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
                    730:        rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
                    731:       }
                    732: #endif
                    733: 
                    734: 
                    735:       len -= NLMSG_ALIGN(nh->rtnh_len);
                    736:       nh = RTNH_NEXT(nh);
                    737:     }
                    738: 
                    739:   /* Ensure nexthops are sorted to satisfy nest invariant */
                    740:   if (!nexthop_is_sorted(first))
                    741:     first = nexthop_sort(first);
                    742: 
                    743:   return first;
                    744: }
                    745: 
                    746: static void
                    747: nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
                    748: {
                    749:   struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
                    750:   int t;
                    751: 
                    752:   for (t = 1; t < max; t++)
                    753:     if (metrics[0] & (1 << t))
                    754:       nl_add_attr_u32(h, bufsize, t, metrics[t]);
                    755: 
                    756:   nl_close_attr(h, a);
                    757: }
                    758: 
                    759: static int
                    760: nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
                    761: {
                    762:   struct rtattr *a = RTA_DATA(hdr);
                    763:   int len = RTA_PAYLOAD(hdr);
                    764: 
                    765:   metrics[0] = 0;
                    766:   for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
                    767:   {
                    768:     if (a->rta_type == RTA_UNSPEC)
                    769:       continue;
                    770: 
                    771:     if (a->rta_type >= max)
                    772:       continue;
                    773: 
                    774:     if (RTA_PAYLOAD(a) != 4)
                    775:       return -1;
                    776: 
                    777:     metrics[0] |= 1 << a->rta_type;
                    778:     metrics[a->rta_type] = rta_get_u32(a);
                    779:   }
                    780: 
                    781:   if (len > 0)
                    782:     return -1;
                    783: 
                    784:   return 0;
                    785: }
                    786: 
                    787: 
                    788: /*
                    789:  *     Scanning of interfaces
                    790:  */
                    791: 
                    792: static void
                    793: nl_parse_link(struct nlmsghdr *h, int scan)
                    794: {
                    795:   struct ifinfomsg *i;
                    796:   struct rtattr *a[BIRD_IFLA_MAX];
                    797:   int new = h->nlmsg_type == RTM_NEWLINK;
                    798:   struct iface f = {};
                    799:   struct iface *ifi;
                    800:   char *name;
                    801:   u32 mtu, master = 0;
                    802:   uint fl;
                    803: 
                    804:   if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
                    805:     return;
                    806:   if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
                    807:     {
                    808:       /*
                    809:        * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
                    810:        * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
                    811:        * We simply ignore all such messages with IFLA_WIRELESS without notice.
                    812:        */
                    813: 
                    814:       if (a[IFLA_WIRELESS])
                    815:        return;
                    816: 
                    817:       log(L_ERR "KIF: Malformed message received");
                    818:       return;
                    819:     }
                    820: 
                    821:   name = RTA_DATA(a[IFLA_IFNAME]);
                    822:   mtu = rta_get_u32(a[IFLA_MTU]);
                    823: 
                    824:   if (a[IFLA_MASTER])
                    825:     master = rta_get_u32(a[IFLA_MASTER]);
                    826: 
                    827:   ifi = if_find_by_index(i->ifi_index);
                    828:   if (!new)
                    829:     {
                    830:       DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
                    831:       if (!ifi)
                    832:        return;
                    833: 
                    834:       if_delete(ifi);
                    835:     }
                    836:   else
                    837:     {
                    838:       DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
                    839:       if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
                    840:        if_delete(ifi);
                    841: 
                    842:       strncpy(f.name, name, sizeof(f.name)-1);
                    843:       f.index = i->ifi_index;
                    844:       f.mtu = mtu;
                    845: 
                    846:       f.master_index = master;
                    847:       f.master = if_find_by_index(master);
                    848: 
                    849:       fl = i->ifi_flags;
                    850:       if (fl & IFF_UP)
                    851:        f.flags |= IF_ADMIN_UP;
                    852:       if (fl & IFF_LOWER_UP)
                    853:        f.flags |= IF_LINK_UP;
                    854:       if (fl & IFF_LOOPBACK)           /* Loopback */
                    855:        f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
                    856:       else if (fl & IFF_POINTOPOINT)   /* PtP */
                    857:        f.flags |= IF_MULTICAST;
                    858:       else if (fl & IFF_BROADCAST)     /* Broadcast */
                    859:        f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
                    860:       else
                    861:        f.flags |= IF_MULTIACCESS;      /* NBMA */
                    862: 
                    863:       if (fl & IFF_MULTICAST)
                    864:        f.flags |= IF_MULTICAST;
                    865: 
                    866:       ifi = if_update(&f);
                    867: 
                    868:       if (!scan)
                    869:        if_end_partial_update(ifi);
                    870:     }
                    871: }
                    872: 
                    873: static void
                    874: nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
                    875: {
                    876:   struct rtattr *a[BIRD_IFA_MAX];
                    877:   struct iface *ifi;
                    878:   u32 ifa_flags;
                    879:   int scope;
                    880: 
                    881:   if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
                    882:     return;
                    883: 
                    884:   if (!a[IFA_LOCAL])
                    885:     {
                    886:       log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
                    887:       return;
                    888:     }
                    889:   if (!a[IFA_ADDRESS])
                    890:     {
                    891:       log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
                    892:       return;
                    893:     }
                    894: 
                    895:   ifi = if_find_by_index(i->ifa_index);
                    896:   if (!ifi)
                    897:     {
                    898:       log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
                    899:       return;
                    900:     }
                    901: 
                    902:   if (a[IFA_FLAGS])
                    903:     ifa_flags = rta_get_u32(a[IFA_FLAGS]);
                    904:   else
                    905:     ifa_flags = i->ifa_flags;
                    906: 
                    907:   struct ifa ifa;
                    908:   bzero(&ifa, sizeof(ifa));
                    909:   ifa.iface = ifi;
                    910:   if (ifa_flags & IFA_F_SECONDARY)
                    911:     ifa.flags |= IA_SECONDARY;
                    912: 
                    913:   ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
                    914: 
                    915:   if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
                    916:     {
                    917:       log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
                    918:       new = 0;
                    919:     }
                    920:   if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
                    921:     {
                    922:       ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
                    923:       net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
                    924: 
                    925:       /* It is either a host address or a peer address */
                    926:       if (ipa_equal(ifa.ip, ifa.brd))
                    927:        ifa.flags |= IA_HOST;
                    928:       else
                    929:        {
                    930:          ifa.flags |= IA_PEER;
                    931:          ifa.opposite = ifa.brd;
                    932:        }
                    933:     }
                    934:   else
                    935:     {
                    936:       net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
                    937:       net_normalize(&ifa.prefix);
                    938: 
                    939:       if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
                    940:        ifa.opposite = ipa_opposite_m1(ifa.ip);
                    941: 
                    942:       if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
                    943:        ifa.opposite = ipa_opposite_m2(ifa.ip);
                    944: 
                    945:       if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
                    946:        {
                    947:          ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]);
                    948:          ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen)));
                    949: 
                    950:          if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd))
                    951:            ifa.brd = ipa_from_ip4(xbrd);
                    952:          else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
                    953:            {
                    954:              log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name);
                    955:              ifa.brd = ipa_from_ip4(ybrd);
                    956:            }
                    957:        }
                    958:     }
                    959: 
                    960:   scope = ipa_classify(ifa.ip);
                    961:   if (scope < 0)
                    962:     {
                    963:       log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
                    964:       return;
                    965:     }
                    966:   ifa.scope = scope & IADDR_SCOPE_MASK;
                    967: 
                    968:   DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
                    969:       ifi->index, ifi->name,
                    970:       new ? "added" : "removed",
                    971:       ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
                    972: 
                    973:   if (new)
                    974:     ifa_update(&ifa);
                    975:   else
                    976:     ifa_delete(&ifa);
                    977: 
                    978:   if (!scan)
                    979:     if_end_partial_update(ifi);
                    980: }
                    981: 
                    982: static void
                    983: nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
                    984: {
                    985:   struct rtattr *a[BIRD_IFA_MAX];
                    986:   struct iface *ifi;
                    987:   u32 ifa_flags;
                    988:   int scope;
                    989: 
                    990:   if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
                    991:     return;
                    992: 
                    993:   if (!a[IFA_ADDRESS])
                    994:     {
                    995:       log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
                    996:       return;
                    997:     }
                    998: 
                    999:   ifi = if_find_by_index(i->ifa_index);
                   1000:   if (!ifi)
                   1001:     {
                   1002:       log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
                   1003:       return;
                   1004:     }
                   1005: 
                   1006:   if (a[IFA_FLAGS])
                   1007:     ifa_flags = rta_get_u32(a[IFA_FLAGS]);
                   1008:   else
                   1009:     ifa_flags = i->ifa_flags;
                   1010: 
                   1011:   struct ifa ifa;
                   1012:   bzero(&ifa, sizeof(ifa));
                   1013:   ifa.iface = ifi;
                   1014:   if (ifa_flags & IFA_F_SECONDARY)
                   1015:     ifa.flags |= IA_SECONDARY;
                   1016: 
                   1017:   /* Ignore tentative addresses silently */
                   1018:   if (ifa_flags & IFA_F_TENTATIVE)
                   1019:     return;
                   1020: 
                   1021:   /* IFA_LOCAL can be unset for IPv6 interfaces */
                   1022:   ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
                   1023: 
                   1024:   if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
                   1025:     {
                   1026:       log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
                   1027:       new = 0;
                   1028:     }
                   1029:   if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
                   1030:     {
                   1031:       ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
                   1032:       net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
                   1033: 
                   1034:       /* It is either a host address or a peer address */
                   1035:       if (ipa_equal(ifa.ip, ifa.brd))
                   1036:        ifa.flags |= IA_HOST;
                   1037:       else
                   1038:        {
                   1039:          ifa.flags |= IA_PEER;
                   1040:          ifa.opposite = ifa.brd;
                   1041:        }
                   1042:     }
                   1043:   else
                   1044:     {
                   1045:       net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
                   1046:       net_normalize(&ifa.prefix);
                   1047: 
                   1048:       if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
                   1049:        ifa.opposite = ipa_opposite_m1(ifa.ip);
                   1050:     }
                   1051: 
                   1052:   scope = ipa_classify(ifa.ip);
                   1053:   if (scope < 0)
                   1054:     {
                   1055:       log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
                   1056:       return;
                   1057:     }
                   1058:   ifa.scope = scope & IADDR_SCOPE_MASK;
                   1059: 
                   1060:   DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
                   1061:       ifi->index, ifi->name,
                   1062:       new ? "added" : "removed",
                   1063:       ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
                   1064: 
                   1065:   if (new)
                   1066:     ifa_update(&ifa);
                   1067:   else
                   1068:     ifa_delete(&ifa);
                   1069: 
                   1070:   if (!scan)
                   1071:     if_end_partial_update(ifi);
                   1072: }
                   1073: 
                   1074: static void
                   1075: nl_parse_addr(struct nlmsghdr *h, int scan)
                   1076: {
                   1077:   struct ifaddrmsg *i;
                   1078: 
                   1079:   if (!(i = nl_checkin(h, sizeof(*i))))
                   1080:     return;
                   1081: 
                   1082:   int new = (h->nlmsg_type == RTM_NEWADDR);
                   1083: 
                   1084:   switch (i->ifa_family)
                   1085:     {
                   1086:       case AF_INET:
                   1087:        return nl_parse_addr4(i, scan, new);
                   1088: 
                   1089:       case AF_INET6:
                   1090:        return nl_parse_addr6(i, scan, new);
                   1091:     }
                   1092: }
                   1093: 
                   1094: void
                   1095: kif_do_scan(struct kif_proto *p UNUSED)
                   1096: {
                   1097:   struct nlmsghdr *h;
                   1098: 
                   1099:   if_start_update();
                   1100: 
                   1101:   nl_request_dump(AF_UNSPEC, RTM_GETLINK);
                   1102:   while (h = nl_get_scan())
                   1103:     if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
                   1104:       nl_parse_link(h, 1);
                   1105:     else
                   1106:       log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
                   1107: 
                   1108:   /* Re-resolve master interface for slaves */
                   1109:   struct iface *i;
                   1110:   WALK_LIST(i, iface_list)
                   1111:     if (i->master_index)
                   1112:     {
                   1113:       struct iface f = {
                   1114:        .flags = i->flags,
                   1115:        .mtu = i->mtu,
                   1116:        .index = i->index,
                   1117:        .master_index = i->master_index,
                   1118:        .master = if_find_by_index(i->master_index)
                   1119:       };
                   1120: 
                   1121:       if (f.master != i->master)
                   1122:       {
                   1123:        memcpy(f.name, i->name, sizeof(f.name));
                   1124:        if_update(&f);
                   1125:       }
                   1126:     }
                   1127: 
                   1128:   nl_request_dump(AF_INET, RTM_GETADDR);
                   1129:   while (h = nl_get_scan())
                   1130:     if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
                   1131:       nl_parse_addr(h, 1);
                   1132:     else
                   1133:       log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
                   1134: 
                   1135:   nl_request_dump(AF_INET6, RTM_GETADDR);
                   1136:   while (h = nl_get_scan())
                   1137:     if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
                   1138:       nl_parse_addr(h, 1);
                   1139:     else
                   1140:       log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
                   1141: 
                   1142:   if_end_update();
                   1143: }
                   1144: 
                   1145: /*
                   1146:  *     Routes
                   1147:  */
                   1148: 
                   1149: static inline u32
                   1150: krt_table_id(struct krt_proto *p)
                   1151: {
                   1152:   return KRT_CF->sys.table_id;
                   1153: }
                   1154: 
                   1155: static HASH(struct krt_proto) nl_table_map;
                   1156: 
                   1157: #define RTH_KEY(p)             p->af, krt_table_id(p)
                   1158: #define RTH_NEXT(p)            p->sys.hash_next
                   1159: #define RTH_EQ(a1,i1,a2,i2)    a1 == a2 && i1 == i2
                   1160: #define RTH_FN(a,i)            a ^ u32_hash(i)
                   1161: 
                   1162: #define RTH_REHASH             rth_rehash
                   1163: #define RTH_PARAMS             /8, *2, 2, 2, 6, 20
                   1164: 
                   1165: HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
                   1166: 
                   1167: int
                   1168: krt_capable(rte *e)
                   1169: {
                   1170:   rta *a = e->attrs;
                   1171: 
                   1172:   switch (a->dest)
                   1173:   {
                   1174:     case RTD_UNICAST:
                   1175:     case RTD_BLACKHOLE:
                   1176:     case RTD_UNREACHABLE:
                   1177:     case RTD_PROHIBIT:
                   1178:       return 1;
                   1179: 
                   1180:     default:
                   1181:       return 0;
                   1182:   }
                   1183: }
                   1184: 
                   1185: static inline int
                   1186: nh_bufsize(struct nexthop *nh)
                   1187: {
                   1188:   int rv = 0;
                   1189:   for (; nh != NULL; nh = nh->next)
                   1190:     rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
                   1191:   return rv;
                   1192: }
                   1193: 
                   1194: static int
                   1195: nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh)
                   1196: {
                   1197:   eattr *ea;
                   1198:   net *net = e->net;
                   1199:   rta *a = e->attrs;
                   1200:   ea_list *eattrs = a->eattrs;
                   1201:   int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
                   1202:   u32 priority = 0;
                   1203: 
                   1204:   struct {
                   1205:     struct nlmsghdr h;
                   1206:     struct rtmsg r;
                   1207:     char buf[0];
                   1208:   } *r;
                   1209: 
                   1210:   int rsize = sizeof(*r) + bufsize;
                   1211:   r = alloca(rsize);
                   1212: 
                   1213:   DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
                   1214: 
                   1215:   bzero(&r->h, sizeof(r->h));
                   1216:   bzero(&r->r, sizeof(r->r));
                   1217:   r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
                   1218:   r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
                   1219:   r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
                   1220: 
                   1221:   r->r.rtm_family = p->af;
                   1222:   r->r.rtm_dst_len = net_pxlen(net->n.addr);
                   1223:   r->r.rtm_protocol = RTPROT_BIRD;
                   1224:   r->r.rtm_scope = RT_SCOPE_NOWHERE;
                   1225: #ifdef HAVE_MPLS_KERNEL
                   1226:   if (p->af == AF_MPLS)
                   1227:   {
                   1228:     /*
                   1229:      * Kernel MPLS code is a bit picky. We must:
                   1230:      * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
                   1231:      * 2) Never use RTA_PRIORITY
                   1232:      */
                   1233: 
                   1234:     u32 label = net_mpls(net->n.addr);
                   1235:     nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
                   1236:     r->r.rtm_scope = RT_SCOPE_UNIVERSE;
                   1237:     r->r.rtm_type = RTN_UNICAST;
                   1238:   }
                   1239:   else
                   1240: #endif
                   1241:   {
                   1242:     nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
                   1243: 
                   1244:     /* Add source address for IPv6 SADR routes */
                   1245:     if (net->n.addr->type == NET_IP6_SADR)
                   1246:     {
                   1247:       net_addr_ip6_sadr *a = (void *) &net->n.addr;
                   1248:       nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix);
                   1249:       r->r.rtm_src_len = a->src_pxlen;
                   1250:     }
                   1251:   }
                   1252: 
                   1253:   /*
                   1254:    * Strange behavior for RTM_DELROUTE:
                   1255:    * 1) rtm_family is ignored in IPv6, works for IPv4
                   1256:    * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
                   1257:    * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
                   1258:    */
                   1259: 
                   1260:   if (krt_table_id(p) < 256)
                   1261:     r->r.rtm_table = krt_table_id(p);
                   1262:   else
                   1263:     nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
                   1264: 
                   1265:   if (p->af == AF_MPLS)
                   1266:     priority = 0;
                   1267:   else if (a->source == RTS_DUMMY)
                   1268:     priority = e->u.krt.metric;
                   1269:   else if (KRT_CF->sys.metric)
                   1270:     priority = KRT_CF->sys.metric;
                   1271:   else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
                   1272:     priority = ea->u.data;
                   1273: 
                   1274:   if (priority)
                   1275:     nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
                   1276: 
                   1277:   /* For route delete, we do not specify remaining route attributes */
                   1278:   if (op == NL_OP_DELETE)
                   1279:     goto dest;
                   1280: 
                   1281:   /* Default scope is LINK for device routes, UNIVERSE otherwise */
                   1282:   if (p->af == AF_MPLS)
                   1283:     r->r.rtm_scope = RT_SCOPE_UNIVERSE;
                   1284:   else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
                   1285:     r->r.rtm_scope = ea->u.data;
                   1286:   else
                   1287:     r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
                   1288: 
                   1289:   if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
                   1290:     nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
                   1291: 
                   1292:   if (ea = ea_find(eattrs, EA_KRT_REALM))
                   1293:     nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
                   1294: 
                   1295: 
                   1296:   u32 metrics[KRT_METRICS_MAX];
                   1297:   metrics[0] = 0;
                   1298: 
                   1299:   struct ea_walk_state ews = { .eattrs = eattrs };
                   1300:   while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
                   1301:   {
                   1302:     int id = ea->id - EA_KRT_METRICS;
                   1303:     metrics[0] |= 1 << id;
                   1304:     metrics[id] = ea->u.data;
                   1305:   }
                   1306: 
                   1307:   if (metrics[0])
                   1308:     nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
                   1309: 
                   1310: 
                   1311: dest:
                   1312:   switch (dest)
                   1313:     {
                   1314:     case RTD_UNICAST:
                   1315:       r->r.rtm_type = RTN_UNICAST;
                   1316:       if (nh->next && !krt_ecmp6(p))
                   1317:        nl_add_multipath(&r->h, rsize, nh, p->af);
                   1318:       else
                   1319:       {
                   1320:        nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
                   1321:        nl_add_nexthop(&r->h, rsize, nh, p->af);
                   1322: 
                   1323:        if (nh->flags & RNF_ONLINK)
                   1324:          r->r.rtm_flags |= RTNH_F_ONLINK;
                   1325:       }
                   1326:       break;
                   1327:     case RTD_BLACKHOLE:
                   1328:       r->r.rtm_type = RTN_BLACKHOLE;
                   1329:       break;
                   1330:     case RTD_UNREACHABLE:
                   1331:       r->r.rtm_type = RTN_UNREACHABLE;
                   1332:       break;
                   1333:     case RTD_PROHIBIT:
                   1334:       r->r.rtm_type = RTN_PROHIBIT;
                   1335:       break;
                   1336:     case RTD_NONE:
                   1337:       break;
                   1338:     default:
                   1339:       bug("krt_capable inconsistent with nl_send_route");
                   1340:     }
                   1341: 
                   1342:   /* Ignore missing for DELETE */
                   1343:   return nl_exchange(&r->h, (op == NL_OP_DELETE));
                   1344: }
                   1345: 
                   1346: static inline int
                   1347: nl_add_rte(struct krt_proto *p, rte *e)
                   1348: {
                   1349:   rta *a = e->attrs;
                   1350:   int err = 0;
                   1351: 
                   1352:   if (krt_ecmp6(p) && a->nh.next)
                   1353:   {
                   1354:     struct nexthop *nh = &(a->nh);
                   1355: 
                   1356:     err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh);
                   1357:     if (err < 0)
                   1358:       return err;
                   1359: 
                   1360:     for (nh = nh->next; nh; nh = nh->next)
                   1361:       err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh);
                   1362: 
                   1363:     return err;
                   1364:   }
                   1365: 
                   1366:   return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh));
                   1367: }
                   1368: 
                   1369: static inline int
                   1370: nl_delete_rte(struct krt_proto *p, rte *e)
                   1371: {
                   1372:   int err = 0;
                   1373: 
                   1374:   /* For IPv6, we just repeatedly request DELETE until we get error */
                   1375:   do
                   1376:     err = nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL);
                   1377:   while (krt_ecmp6(p) && !err);
                   1378: 
                   1379:   return err;
                   1380: }
                   1381: 
                   1382: static inline int
                   1383: nl_replace_rte(struct krt_proto *p, rte *e)
                   1384: {
                   1385:   rta *a = e->attrs;
                   1386:   return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh));
                   1387: }
                   1388: 
                   1389: 
                   1390: void
                   1391: krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old)
                   1392: {
                   1393:   int err = 0;
                   1394: 
                   1395:   /*
                   1396:    * We use NL_OP_REPLACE for IPv4, it has an issue with not checking for
                   1397:    * matching rtm_protocol, but that is OK when dedicated priority is used.
                   1398:    *
                   1399:    * We do not use NL_OP_REPLACE for IPv6, as it has broken semantics for ECMP
                   1400:    * and with some kernel versions ECMP replace crashes kernel. Would need more
                   1401:    * testing and checks for kernel versions.
                   1402:    *
                   1403:    * For IPv6, we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the
                   1404:    * old route value, so we do not try to optimize IPv6 ECMP reconfigurations.
                   1405:    */
                   1406: 
                   1407:   if (krt_ipv4(p) && old && new)
                   1408:   {
                   1409:     err = nl_replace_rte(p, new);
                   1410:   }
                   1411:   else
                   1412:   {
                   1413:     if (old)
                   1414:       nl_delete_rte(p, old);
                   1415: 
                   1416:     if (new)
                   1417:       err = nl_add_rte(p, new);
                   1418:   }
                   1419: 
                   1420:   if (err < 0)
                   1421:     n->n.flags |= KRF_SYNC_ERROR;
                   1422:   else
                   1423:     n->n.flags &= ~KRF_SYNC_ERROR;
                   1424: }
                   1425: 
                   1426: static int
                   1427: nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family)
                   1428: {
                   1429:   /* Route merging is used for IPv6 scans */
                   1430:   if (!s->scan || (rtm_family != AF_INET6))
                   1431:     return 0;
                   1432: 
                   1433:   /* Saved and new route must have same network, proto/table, and priority */
                   1434:   if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
                   1435:     return 0;
                   1436: 
                   1437:   /* Both must be regular unicast routes */
                   1438:   if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
                   1439:     return 0;
                   1440: 
                   1441:   return 1;
                   1442: }
                   1443: 
                   1444: static void
                   1445: nl_announce_route(struct nl_parse_state *s)
                   1446: {
                   1447:   rte *e = rte_get_temp(s->attrs);
                   1448:   e->net = s->net;
                   1449:   e->u.krt.src = s->krt_src;
                   1450:   e->u.krt.proto = s->krt_proto;
                   1451:   e->u.krt.seen = 0;
                   1452:   e->u.krt.best = 0;
                   1453:   e->u.krt.metric = s->krt_metric;
                   1454: 
                   1455:   if (s->scan)
                   1456:     krt_got_route(s->proto, e);
                   1457:   else
                   1458:     krt_got_route_async(s->proto, e, s->new);
                   1459: 
                   1460:   s->net = NULL;
                   1461:   s->attrs = NULL;
                   1462:   s->proto = NULL;
                   1463:   lp_flush(s->pool);
                   1464: }
                   1465: 
                   1466: static inline void
                   1467: nl_parse_begin(struct nl_parse_state *s, int scan)
                   1468: {
                   1469:   memset(s, 0, sizeof (struct nl_parse_state));
                   1470:   s->pool = nl_linpool;
                   1471:   s->scan = scan;
                   1472: }
                   1473: 
                   1474: static inline void
                   1475: nl_parse_end(struct nl_parse_state *s)
                   1476: {
                   1477:   if (s->net)
                   1478:     nl_announce_route(s);
                   1479: }
                   1480: 
                   1481: 
                   1482: #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
                   1483: 
                   1484: static void
                   1485: nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
                   1486: {
                   1487:   struct krt_proto *p;
                   1488:   struct rtmsg *i;
                   1489:   struct rtattr *a[BIRD_RTA_MAX];
                   1490:   int new = h->nlmsg_type == RTM_NEWROUTE;
                   1491: 
                   1492:   net_addr dst, src = {};
                   1493:   u32 oif = ~0;
                   1494:   u32 table_id;
                   1495:   u32 priority = 0;
                   1496:   u32 def_scope = RT_SCOPE_UNIVERSE;
                   1497:   int krt_src;
                   1498: 
                   1499:   if (!(i = nl_checkin(h, sizeof(*i))))
                   1500:     return;
                   1501: 
                   1502:   switch (i->rtm_family)
                   1503:     {
                   1504:     case AF_INET:
                   1505:       if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
                   1506:        return;
                   1507: 
                   1508:       if (a[RTA_DST])
                   1509:        net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
                   1510:       else
                   1511:        net_fill_ip4(&dst, IP4_NONE, 0);
                   1512:       break;
                   1513: 
                   1514:     case AF_INET6:
                   1515:       if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
                   1516:        return;
                   1517: 
                   1518:       if (a[RTA_DST])
                   1519:        net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
                   1520:       else
                   1521:        net_fill_ip6(&dst, IP6_NONE, 0);
                   1522: 
                   1523:       if (a[RTA_SRC])
                   1524:        net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len);
                   1525:       else
                   1526:        net_fill_ip6(&src, IP6_NONE, 0);
                   1527:       break;
                   1528: 
                   1529: #ifdef HAVE_MPLS_KERNEL
                   1530:     case AF_MPLS:
                   1531:       if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
                   1532:        return;
                   1533: 
                   1534:       if (!a[RTA_DST])
                   1535:        SKIP("MPLS route without RTA_DST");
                   1536: 
                   1537:       if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
                   1538:        SKIP("MPLS route with multi-label RTA_DST");
                   1539: 
                   1540:       net_fill_mpls(&dst, rta_mpls_stack[0]);
                   1541:       break;
                   1542: #endif
                   1543: 
                   1544:     default:
                   1545:       return;
                   1546:     }
                   1547: 
                   1548:   if (a[RTA_OIF])
                   1549:     oif = rta_get_u32(a[RTA_OIF]);
                   1550: 
                   1551:   if (a[RTA_TABLE])
                   1552:     table_id = rta_get_u32(a[RTA_TABLE]);
                   1553:   else
                   1554:     table_id = i->rtm_table;
                   1555: 
                   1556:   /* Do we know this table? */
                   1557:   p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
                   1558:   if (!p)
                   1559:     SKIP("unknown table %u\n", table_id);
                   1560: 
                   1561:   if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR))
                   1562:     SKIP("src prefix for non-SADR channel\n");
                   1563: 
                   1564:   if (a[RTA_IIF])
                   1565:     SKIP("IIF set\n");
                   1566: 
                   1567:   if (i->rtm_tos != 0)                 /* We don't support TOS */
                   1568:     SKIP("TOS %02x\n", i->rtm_tos);
                   1569: 
                   1570:   if (s->scan && !new)
                   1571:     SKIP("RTM_DELROUTE in scan\n");
                   1572: 
                   1573:   if (a[RTA_PRIORITY])
                   1574:     priority = rta_get_u32(a[RTA_PRIORITY]);
                   1575: 
                   1576:   int c = net_classify(&dst);
                   1577:   if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
                   1578:     SKIP("strange class/scope\n");
                   1579: 
                   1580:   switch (i->rtm_protocol)
                   1581:     {
                   1582:     case RTPROT_UNSPEC:
                   1583:       SKIP("proto unspec\n");
                   1584: 
                   1585:     case RTPROT_REDIRECT:
                   1586:       krt_src = KRT_SRC_REDIRECT;
                   1587:       break;
                   1588: 
                   1589:     case RTPROT_KERNEL:
                   1590:       krt_src = KRT_SRC_KERNEL;
                   1591:       return;
                   1592: 
                   1593:     case RTPROT_BIRD:
                   1594:       if (!s->scan)
                   1595:        SKIP("echo\n");
                   1596:       krt_src = KRT_SRC_BIRD;
                   1597:       break;
                   1598: 
                   1599:     case RTPROT_BOOT:
                   1600:     default:
                   1601:       krt_src = KRT_SRC_ALIEN;
                   1602:     }
                   1603: 
                   1604:   net_addr *n = &dst;
                   1605:   if (p->p.net_type == NET_IP6_SADR)
                   1606:   {
                   1607:     n = alloca(sizeof(net_addr_ip6_sadr));
                   1608:     net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst),
                   1609:                      net6_prefix(&src), net6_pxlen(&src));
                   1610:   }
                   1611: 
                   1612:   net *net = net_get(p->p.main_channel->table, n);
                   1613: 
                   1614:   if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family))
                   1615:     nl_announce_route(s);
                   1616: 
                   1617:   rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
                   1618:   ra->src = p->p.main_source;
                   1619:   ra->source = RTS_INHERIT;
                   1620:   ra->scope = SCOPE_UNIVERSE;
                   1621: 
                   1622:   switch (i->rtm_type)
                   1623:     {
                   1624:     case RTN_UNICAST:
                   1625:       ra->dest = RTD_UNICAST;
                   1626: 
                   1627:       if (a[RTA_MULTIPATH])
                   1628:         {
                   1629:          struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family);
                   1630:          if (!nh)
                   1631:            {
                   1632:              log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
                   1633:              return;
                   1634:            }
                   1635: 
                   1636:          nexthop_link(ra, nh);
                   1637:          break;
                   1638:        }
                   1639: 
                   1640:       ra->nh.iface = if_find_by_index(oif);
                   1641:       if (!ra->nh.iface)
                   1642:        {
                   1643:          log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
                   1644:          return;
                   1645:        }
                   1646: 
                   1647:       if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY]
                   1648: #ifdef HAVE_MPLS_KERNEL
                   1649:          || (i->rtm_family == AF_MPLS) && a[RTA_VIA]
                   1650: #endif
                   1651:          )
                   1652:        {
                   1653: #ifdef HAVE_MPLS_KERNEL
                   1654:          if (i->rtm_family == AF_MPLS)
                   1655:            ra->nh.gw = rta_get_via(a[RTA_VIA]);
                   1656:          else
                   1657: #endif
                   1658:            ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
                   1659: 
                   1660:          /* Silently skip strange 6to4 routes */
                   1661:          const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
                   1662:          if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
                   1663:            return;
                   1664: 
                   1665:          if (i->rtm_flags & RTNH_F_ONLINK)
                   1666:            ra->nh.flags |= RNF_ONLINK;
                   1667: 
                   1668:          neighbor *nbr;
                   1669:          nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
                   1670:                           (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
                   1671:          if (!nbr || (nbr->scope == SCOPE_HOST))
                   1672:            {
                   1673:              log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
                   1674:                   ra->nh.gw);
                   1675:              return;
                   1676:            }
                   1677:        }
                   1678: 
                   1679:       break;
                   1680:     case RTN_BLACKHOLE:
                   1681:       ra->dest = RTD_BLACKHOLE;
                   1682:       break;
                   1683:     case RTN_UNREACHABLE:
                   1684:       ra->dest = RTD_UNREACHABLE;
                   1685:       break;
                   1686:     case RTN_PROHIBIT:
                   1687:       ra->dest = RTD_PROHIBIT;
                   1688:       break;
                   1689:     /* FIXME: What about RTN_THROW? */
                   1690:     default:
                   1691:       SKIP("type %d\n", i->rtm_type);
                   1692:       return;
                   1693:     }
                   1694: 
                   1695: #ifdef HAVE_MPLS_KERNEL
                   1696:   if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
                   1697:     ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
                   1698: 
                   1699:   if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
                   1700:     {
                   1701:       switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
                   1702:        {
                   1703:          case LWTUNNEL_ENCAP_MPLS:
                   1704:            {
                   1705:              struct rtattr *enca[BIRD_RTA_MAX];
                   1706:              nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
                   1707:              nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
                   1708:              ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
                   1709:              break;
                   1710:            }
                   1711:          default:
                   1712:            SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
                   1713:            break;
                   1714:        }
                   1715:     }
                   1716: #endif
                   1717: 
                   1718:   if (i->rtm_scope != def_scope)
                   1719:     {
                   1720:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
                   1721:       ea->next = ra->eattrs;
                   1722:       ra->eattrs = ea;
                   1723:       ea->flags = EALF_SORTED;
                   1724:       ea->count = 1;
                   1725:       ea->attrs[0].id = EA_KRT_SCOPE;
                   1726:       ea->attrs[0].flags = 0;
                   1727:       ea->attrs[0].type = EAF_TYPE_INT;
                   1728:       ea->attrs[0].u.data = i->rtm_scope;
                   1729:     }
                   1730: 
                   1731:   if (a[RTA_PREFSRC])
                   1732:     {
                   1733:       ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
                   1734: 
                   1735:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
                   1736:       ea->next = ra->eattrs;
                   1737:       ra->eattrs = ea;
                   1738:       ea->flags = EALF_SORTED;
                   1739:       ea->count = 1;
                   1740:       ea->attrs[0].id = EA_KRT_PREFSRC;
                   1741:       ea->attrs[0].flags = 0;
                   1742:       ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
                   1743: 
                   1744:       struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
                   1745:       ad->length = sizeof(ps);
                   1746:       memcpy(ad->data, &ps, sizeof(ps));
                   1747: 
                   1748:       ea->attrs[0].u.ptr = ad;
                   1749:     }
                   1750: 
                   1751:   if (a[RTA_FLOW])
                   1752:     {
                   1753:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
                   1754:       ea->next = ra->eattrs;
                   1755:       ra->eattrs = ea;
                   1756:       ea->flags = EALF_SORTED;
                   1757:       ea->count = 1;
                   1758:       ea->attrs[0].id = EA_KRT_REALM;
                   1759:       ea->attrs[0].flags = 0;
                   1760:       ea->attrs[0].type = EAF_TYPE_INT;
                   1761:       ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
                   1762:     }
                   1763: 
                   1764:   if (a[RTA_METRICS])
                   1765:     {
                   1766:       u32 metrics[KRT_METRICS_MAX];
                   1767:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
                   1768:       int t, n = 0;
                   1769: 
                   1770:       if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
                   1771:         {
                   1772:          log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
                   1773:          return;
                   1774:        }
                   1775: 
                   1776:       for (t = 1; t < KRT_METRICS_MAX; t++)
                   1777:        if (metrics[0] & (1 << t))
                   1778:          {
                   1779:            ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
                   1780:            ea->attrs[n].flags = 0;
                   1781:            ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
                   1782:            ea->attrs[n].u.data = metrics[t];
                   1783:            n++;
                   1784:          }
                   1785: 
                   1786:       if (n > 0)
                   1787:         {
                   1788:          ea->next = ra->eattrs;
                   1789:          ea->flags = EALF_SORTED;
                   1790:          ea->count = n;
                   1791:          ra->eattrs = ea;
                   1792:        }
                   1793:     }
                   1794: 
                   1795:   /*
                   1796:    * Ideally, now we would send the received route to the rest of kernel code.
                   1797:    * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
                   1798:    * postpone it and merge next hops until the end of the sequence. Note that
                   1799:    * when doing merging of next hops, we expect the new route to be unipath.
                   1800:    * Otherwise, we ignore additional next hops in nexthop_insert().
                   1801:    */
                   1802: 
                   1803:   if (!s->net)
                   1804:   {
                   1805:     /* Store the new route */
                   1806:     s->net = net;
                   1807:     s->attrs = ra;
                   1808:     s->proto = p;
                   1809:     s->new = new;
                   1810:     s->krt_src = krt_src;
                   1811:     s->krt_type = i->rtm_type;
                   1812:     s->krt_proto = i->rtm_protocol;
                   1813:     s->krt_metric = priority;
                   1814:   }
                   1815:   else
                   1816:   {
                   1817:     /* Merge next hops with the stored route */
                   1818:     rta *oa = s->attrs;
                   1819: 
                   1820:     struct nexthop *nhs = &oa->nh;
                   1821:     nexthop_insert(&nhs, &ra->nh);
                   1822: 
                   1823:     /* Perhaps new nexthop is inserted at the first position */
                   1824:     if (nhs == &ra->nh)
                   1825:     {
                   1826:       /* Swap rtas */
                   1827:       s->attrs = ra;
                   1828: 
                   1829:       /* Keep old eattrs */
                   1830:       ra->eattrs = oa->eattrs;
                   1831:     }
                   1832:   }
                   1833: }
                   1834: 
                   1835: void
                   1836: krt_do_scan(struct krt_proto *p UNUSED)        /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
                   1837: {
                   1838:   struct nlmsghdr *h;
                   1839:   struct nl_parse_state s;
                   1840: 
                   1841:   nl_parse_begin(&s, 1);
                   1842:   nl_request_dump(AF_UNSPEC, RTM_GETROUTE);
                   1843:   while (h = nl_get_scan())
                   1844:     if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
                   1845:       nl_parse_route(&s, h);
                   1846:     else
                   1847:       log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
                   1848:   nl_parse_end(&s);
                   1849: }
                   1850: 
                   1851: /*
                   1852:  *     Asynchronous Netlink interface
                   1853:  */
                   1854: 
                   1855: static sock *nl_async_sk;              /* BIRD socket for asynchronous notifications */
                   1856: static byte *nl_async_rx_buffer;       /* Receive buffer */
                   1857: 
                   1858: static void
                   1859: nl_async_msg(struct nlmsghdr *h)
                   1860: {
                   1861:   struct nl_parse_state s;
                   1862: 
                   1863:   switch (h->nlmsg_type)
                   1864:     {
                   1865:     case RTM_NEWROUTE:
                   1866:     case RTM_DELROUTE:
                   1867:       DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
                   1868:       nl_parse_begin(&s, 0);
                   1869:       nl_parse_route(&s, h);
                   1870:       nl_parse_end(&s);
                   1871:       break;
                   1872:     case RTM_NEWLINK:
                   1873:     case RTM_DELLINK:
                   1874:       DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
                   1875:       if (kif_proto)
                   1876:        nl_parse_link(h, 0);
                   1877:       break;
                   1878:     case RTM_NEWADDR:
                   1879:     case RTM_DELADDR:
                   1880:       DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
                   1881:       if (kif_proto)
                   1882:        nl_parse_addr(h, 0);
                   1883:       break;
                   1884:     default:
                   1885:       DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
                   1886:     }
                   1887: }
                   1888: 
                   1889: static int
                   1890: nl_async_hook(sock *sk, uint size UNUSED)
                   1891: {
                   1892:   struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
                   1893:   struct sockaddr_nl sa;
                   1894:   struct msghdr m = {
                   1895:     .msg_name = &sa,
                   1896:     .msg_namelen = sizeof(sa),
                   1897:     .msg_iov = &iov,
                   1898:     .msg_iovlen = 1,
                   1899:   };
                   1900:   struct nlmsghdr *h;
                   1901:   int x;
                   1902:   uint len;
                   1903: 
                   1904:   x = recvmsg(sk->fd, &m, 0);
                   1905:   if (x < 0)
                   1906:     {
                   1907:       if (errno == ENOBUFS)
                   1908:        {
                   1909:          /*
                   1910:           *  Netlink reports some packets have been thrown away.
                   1911:           *  One day we might react to it by asking for route table
                   1912:           *  scan in near future.
                   1913:           */
                   1914:          log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
                   1915:          return 1;     /* More data are likely to be ready */
                   1916:        }
                   1917:       else if (errno != EWOULDBLOCK)
                   1918:        log(L_ERR "Netlink recvmsg: %m");
                   1919:       return 0;
                   1920:     }
                   1921:   if (sa.nl_pid)               /* It isn't from the kernel */
                   1922:     {
                   1923:       DBG("Non-kernel packet\n");
                   1924:       return 1;
                   1925:     }
                   1926:   h = (void *) nl_async_rx_buffer;
                   1927:   len = x;
                   1928:   if (m.msg_flags & MSG_TRUNC)
                   1929:     {
                   1930:       log(L_WARN "Netlink got truncated asynchronous message");
                   1931:       return 1;
                   1932:     }
                   1933:   while (NLMSG_OK(h, len))
                   1934:     {
                   1935:       nl_async_msg(h);
                   1936:       h = NLMSG_NEXT(h, len);
                   1937:     }
                   1938:   if (len)
                   1939:     log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
                   1940:   return 1;
                   1941: }
                   1942: 
                   1943: static void
                   1944: nl_async_err_hook(sock *sk, int e UNUSED)
                   1945: {
                   1946:   nl_async_hook(sk, 0);
                   1947: }
                   1948: 
                   1949: static void
                   1950: nl_open_async(void)
                   1951: {
                   1952:   sock *sk;
                   1953:   struct sockaddr_nl sa;
                   1954:   int fd;
                   1955: 
                   1956:   if (nl_async_sk)
                   1957:     return;
                   1958: 
                   1959:   DBG("KRT: Opening async netlink socket\n");
                   1960: 
                   1961:   fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
                   1962:   if (fd < 0)
                   1963:     {
                   1964:       log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
                   1965:       return;
                   1966:     }
                   1967: 
                   1968:   bzero(&sa, sizeof(sa));
                   1969:   sa.nl_family = AF_NETLINK;
                   1970:   sa.nl_groups = RTMGRP_LINK |
                   1971:     RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
                   1972:     RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
                   1973: 
                   1974:   if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
                   1975:     {
                   1976:       log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
                   1977:       close(fd);
                   1978:       return;
                   1979:     }
                   1980: 
                   1981:   nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
                   1982: 
                   1983:   sk = nl_async_sk = sk_new(krt_pool);
                   1984:   sk->type = SK_MAGIC;
                   1985:   sk->rx_hook = nl_async_hook;
                   1986:   sk->err_hook = nl_async_err_hook;
                   1987:   sk->fd = fd;
                   1988:   if (sk_open(sk) < 0)
                   1989:     bug("Netlink: sk_open failed");
                   1990: }
                   1991: 
                   1992: 
                   1993: /*
                   1994:  *     Interface to the UNIX krt module
                   1995:  */
                   1996: 
                   1997: void
                   1998: krt_sys_io_init(void)
                   1999: {
                   2000:   nl_linpool = lp_new_default(krt_pool);
                   2001:   HASH_INIT(nl_table_map, krt_pool, 6);
                   2002: }
                   2003: 
                   2004: int
                   2005: krt_sys_start(struct krt_proto *p)
                   2006: {
                   2007:   struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
                   2008: 
                   2009:   if (old)
                   2010:     {
                   2011:       log(L_ERR "%s: Kernel table %u already registered by %s",
                   2012:          p->p.name, krt_table_id(p), old->p.name);
                   2013:       return 0;
                   2014:     }
                   2015: 
                   2016:   HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
                   2017: 
                   2018:   nl_open();
                   2019:   nl_open_async();
                   2020: 
                   2021:   return 1;
                   2022: }
                   2023: 
                   2024: void
                   2025: krt_sys_shutdown(struct krt_proto *p)
                   2026: {
                   2027:   HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
                   2028: }
                   2029: 
                   2030: int
                   2031: krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
                   2032: {
                   2033:   return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
                   2034: }
                   2035: 
                   2036: void
                   2037: krt_sys_init_config(struct krt_config *cf)
                   2038: {
                   2039:   cf->sys.table_id = RT_TABLE_MAIN;
                   2040:   cf->sys.metric = 32;
                   2041: }
                   2042: 
                   2043: void
                   2044: krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
                   2045: {
                   2046:   d->sys.table_id = s->sys.table_id;
                   2047:   d->sys.metric = s->sys.metric;
                   2048: }
                   2049: 
                   2050: static const char *krt_metrics_names[KRT_METRICS_MAX] = {
                   2051:   NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
                   2052:   "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
                   2053: };
                   2054: 
                   2055: static const char *krt_features_names[KRT_FEATURES_MAX] = {
                   2056:   "ecn", NULL, NULL, "allfrag"
                   2057: };
                   2058: 
                   2059: int
                   2060: krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
                   2061: {
                   2062:   switch (a->id)
                   2063:   {
                   2064:   case EA_KRT_PREFSRC:
                   2065:     bsprintf(buf, "prefsrc");
                   2066:     return GA_NAME;
                   2067: 
                   2068:   case EA_KRT_REALM:
                   2069:     bsprintf(buf, "realm");
                   2070:     return GA_NAME;
                   2071: 
                   2072:   case EA_KRT_SCOPE:
                   2073:     bsprintf(buf, "scope");
                   2074:     return GA_NAME;
                   2075: 
                   2076:   case EA_KRT_LOCK:
                   2077:     buf += bsprintf(buf, "lock:");
                   2078:     ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
                   2079:     return GA_FULL;
                   2080: 
                   2081:   case EA_KRT_FEATURES:
                   2082:     buf += bsprintf(buf, "features:");
                   2083:     ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
                   2084:     return GA_FULL;
                   2085: 
                   2086:   default:;
                   2087:     int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
                   2088:     if (id > 0 && id < KRT_METRICS_MAX)
                   2089:     {
                   2090:       bsprintf(buf, "%s", krt_metrics_names[id]);
                   2091:       return GA_NAME;
                   2092:     }
                   2093: 
                   2094:     return GA_UNKNOWN;
                   2095:   }
                   2096: }
                   2097: 
                   2098: 
                   2099: 
                   2100: void
                   2101: kif_sys_start(struct kif_proto *p UNUSED)
                   2102: {
                   2103:   nl_open();
                   2104:   nl_open_async();
                   2105: }
                   2106: 
                   2107: void
                   2108: kif_sys_shutdown(struct kif_proto *p UNUSED)
                   2109: {
                   2110: }
                   2111: 
                   2112: int
                   2113: kif_update_sysdep_addr(struct iface *i UNUSED)
                   2114: {
                   2115:   return 0;
                   2116: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>