Annotation of embedaddon/bird/sysdep/linux/netlink.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:  *     BIRD -- Linux Netlink Interface
                      3:  *
                      4:  *     (c) 1999--2000 Martin Mares <mj@ucw.cz>
                      5:  *
                      6:  *     Can be freely distributed and used under the terms of the GNU GPL.
                      7:  */
                      8: 
                      9: #include <stdio.h>
                     10: #include <unistd.h>
                     11: #include <fcntl.h>
                     12: #include <sys/socket.h>
                     13: #include <sys/uio.h>
                     14: #include <errno.h>
                     15: 
                     16: #undef LOCAL_DEBUG
                     17: 
                     18: #include "nest/bird.h"
                     19: #include "nest/route.h"
                     20: #include "nest/protocol.h"
                     21: #include "nest/iface.h"
                     22: #include "lib/timer.h"
                     23: #include "lib/unix.h"
                     24: #include "lib/krt.h"
                     25: #include "lib/socket.h"
                     26: #include "lib/string.h"
                     27: #include "lib/hash.h"
                     28: #include "conf/conf.h"
                     29: 
                     30: #include <asm/types.h>
                     31: #include <linux/if.h>
                     32: #include <linux/netlink.h>
                     33: #include <linux/rtnetlink.h>
                     34: 
                     35: 
                     36: #ifndef MSG_TRUNC                      /* Hack: Several versions of glibc miss this one :( */
                     37: #define MSG_TRUNC 0x20
                     38: #endif
                     39: 
                     40: #ifndef IFA_FLAGS
                     41: #define IFA_FLAGS 8
                     42: #endif
                     43: 
                     44: #ifndef IFF_LOWER_UP
                     45: #define IFF_LOWER_UP 0x10000
                     46: #endif
                     47: 
                     48: #ifndef RTA_TABLE
                     49: #define RTA_TABLE  15
                     50: #endif
                     51: 
                     52: 
                     53: #ifdef IPV6
                     54: #define krt_ecmp6(X) 1
                     55: #else
                     56: #define krt_ecmp6(X) 0
                     57: #endif
                     58: 
                     59: /*
                     60:  * Structure nl_parse_state keeps state of received route processing. Ideally,
                     61:  * we could just independently parse received Netlink messages and immediately
                     62:  * propagate received routes to the rest of BIRD, but Linux kernel represents
                     63:  * and announces IPv6 ECMP routes not as one route with multiple next hops (like
                     64:  * RTA_MULTIPATH in IPv4 ECMP), but as a set of routes with the same prefix.
                     65:  *
                     66:  * Therefore, BIRD keeps currently processed route in nl_parse_state structure
                     67:  * and postpones its propagation until we expect it to be final; i.e., when
                     68:  * non-matching route is received or when the scan ends. When another matching
                     69:  * route is received, it is merged with the already processed route to form an
                     70:  * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
                     71:  * postponing is done in both cases (for simplicity). All IPv4 routes are just
                     72:  * considered non-matching.
                     73:  *
                     74:  * This is ignored for asynchronous notifications (every notification is handled
                     75:  * as a separate route). It is not an issue for our routes, as we ignore such
                     76:  * notifications anyways. But importing alien IPv6 ECMP routes does not work
                     77:  * properly.
                     78:  */
                     79: 
                     80: struct nl_parse_state
                     81: {
                     82:   struct linpool *pool;
                     83:   int scan;
                     84:   int merge;
                     85: 
                     86:   net *net;
                     87:   rta *attrs;
                     88:   struct krt_proto *proto;
                     89:   s8 new;
                     90:   s8 krt_src;
                     91:   u8 krt_type;
                     92:   u8 krt_proto;
                     93:   u32 krt_metric;
                     94: };
                     95: 
                     96: /*
                     97:  *     Synchronous Netlink interface
                     98:  */
                     99: 
                    100: struct nl_sock
                    101: {
                    102:   int fd;
                    103:   u32 seq;
                    104:   byte *rx_buffer;                     /* Receive buffer */
                    105:   struct nlmsghdr *last_hdr;           /* Recently received packet */
                    106:   uint last_size;
                    107: };
                    108: 
                    109: #define NL_RX_SIZE 8192
                    110: 
                    111: #define NL_OP_DELETE   0
                    112: #define NL_OP_ADD      (NLM_F_CREATE|NLM_F_EXCL)
                    113: #define NL_OP_REPLACE  (NLM_F_CREATE|NLM_F_REPLACE)
                    114: #define NL_OP_APPEND   (NLM_F_CREATE|NLM_F_APPEND)
                    115: 
                    116: static linpool *nl_linpool;
                    117: 
                    118: static struct nl_sock nl_scan = {.fd = -1};    /* Netlink socket for synchronous scan */
                    119: static struct nl_sock nl_req  = {.fd = -1};    /* Netlink socket for requests */
                    120: 
                    121: static void
                    122: nl_open_sock(struct nl_sock *nl)
                    123: {
                    124:   if (nl->fd < 0)
                    125:     {
                    126:       nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
                    127:       if (nl->fd < 0)
                    128:        die("Unable to open rtnetlink socket: %m");
                    129:       nl->seq = now;
                    130:       nl->rx_buffer = xmalloc(NL_RX_SIZE);
                    131:       nl->last_hdr = NULL;
                    132:       nl->last_size = 0;
                    133:     }
                    134: }
                    135: 
                    136: static void
                    137: nl_open(void)
                    138: {
                    139:   nl_open_sock(&nl_scan);
                    140:   nl_open_sock(&nl_req);
                    141: }
                    142: 
                    143: static void
                    144: nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
                    145: {
                    146:   struct sockaddr_nl sa;
                    147: 
                    148:   memset(&sa, 0, sizeof(sa));
                    149:   sa.nl_family = AF_NETLINK;
                    150:   nh->nlmsg_pid = 0;
                    151:   nh->nlmsg_seq = ++(nl->seq);
                    152:   if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
                    153:     die("rtnetlink sendto: %m");
                    154:   nl->last_hdr = NULL;
                    155: }
                    156: 
                    157: static void
                    158: nl_request_dump(int af, int cmd)
                    159: {
                    160:   struct {
                    161:     struct nlmsghdr nh;
                    162:     struct rtgenmsg g;
                    163:   } req = {
                    164:     .nh.nlmsg_type = cmd,
                    165:     .nh.nlmsg_len = sizeof(req),
                    166:     .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
                    167:     .g.rtgen_family = af
                    168:   };
                    169:   nl_send(&nl_scan, &req.nh);
                    170: }
                    171: 
                    172: static struct nlmsghdr *
                    173: nl_get_reply(struct nl_sock *nl)
                    174: {
                    175:   for(;;)
                    176:     {
                    177:       if (!nl->last_hdr)
                    178:        {
                    179:          struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
                    180:          struct sockaddr_nl sa;
                    181:          struct msghdr m = {
                    182:            .msg_name = &sa,
                    183:            .msg_namelen = sizeof(sa),
                    184:            .msg_iov = &iov,
                    185:            .msg_iovlen = 1,
                    186:          };
                    187:          int x = recvmsg(nl->fd, &m, 0);
                    188:          if (x < 0)
                    189:            die("nl_get_reply: %m");
                    190:          if (sa.nl_pid)                /* It isn't from the kernel */
                    191:            {
                    192:              DBG("Non-kernel packet\n");
                    193:              continue;
                    194:            }
                    195:          nl->last_size = x;
                    196:          nl->last_hdr = (void *) nl->rx_buffer;
                    197:          if (m.msg_flags & MSG_TRUNC)
                    198:            bug("nl_get_reply: got truncated reply which should be impossible");
                    199:        }
                    200:       if (NLMSG_OK(nl->last_hdr, nl->last_size))
                    201:        {
                    202:          struct nlmsghdr *h = nl->last_hdr;
                    203:          nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
                    204:          if (h->nlmsg_seq != nl->seq)
                    205:            {
                    206:              log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
                    207:                  h->nlmsg_seq, nl->seq);
                    208:              continue;
                    209:            }
                    210:          return h;
                    211:        }
                    212:       if (nl->last_size)
                    213:        log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
                    214:       nl->last_hdr = NULL;
                    215:     }
                    216: }
                    217: 
                    218: static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
                    219: 
                    220: static int
                    221: nl_error(struct nlmsghdr *h, int ignore_esrch)
                    222: {
                    223:   struct nlmsgerr *e;
                    224:   int ec;
                    225: 
                    226:   if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
                    227:     {
                    228:       log(L_WARN "Netlink: Truncated error message received");
                    229:       return ENOBUFS;
                    230:     }
                    231:   e = (struct nlmsgerr *) NLMSG_DATA(h);
                    232:   ec = -e->error;
                    233:   if (ec && !(ignore_esrch && (ec == ESRCH)))
                    234:     log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
                    235:   return ec;
                    236: }
                    237: 
                    238: static struct nlmsghdr *
                    239: nl_get_scan(void)
                    240: {
                    241:   struct nlmsghdr *h = nl_get_reply(&nl_scan);
                    242: 
                    243:   if (h->nlmsg_type == NLMSG_DONE)
                    244:     return NULL;
                    245:   if (h->nlmsg_type == NLMSG_ERROR)
                    246:     {
                    247:       nl_error(h, 0);
                    248:       return NULL;
                    249:     }
                    250:   return h;
                    251: }
                    252: 
                    253: static int
                    254: nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
                    255: {
                    256:   struct nlmsghdr *h;
                    257: 
                    258:   nl_send(&nl_req, pkt);
                    259:   for(;;)
                    260:     {
                    261:       h = nl_get_reply(&nl_req);
                    262:       if (h->nlmsg_type == NLMSG_ERROR)
                    263:        break;
                    264:       log(L_WARN "nl_exchange: Unexpected reply received");
                    265:     }
                    266:   return nl_error(h, ignore_esrch) ? -1 : 0;
                    267: }
                    268: 
                    269: /*
                    270:  *     Netlink attributes
                    271:  */
                    272: 
                    273: static int nl_attr_len;
                    274: 
                    275: static void *
                    276: nl_checkin(struct nlmsghdr *h, int lsize)
                    277: {
                    278:   nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
                    279:   if (nl_attr_len < 0)
                    280:     {
                    281:       log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
                    282:       return NULL;
                    283:     }
                    284:   return NLMSG_DATA(h);
                    285: }
                    286: 
                    287: struct nl_want_attrs {
                    288:   u8 defined:1;
                    289:   u8 checksize:1;
                    290:   u8 size;
                    291: };
                    292: 
                    293: 
                    294: #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
                    295: 
                    296: static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
                    297:   [IFLA_IFNAME]          = { 1, 0, 0 },
                    298:   [IFLA_MTU]     = { 1, 1, sizeof(u32) },
                    299:   [IFLA_WIRELESS] = { 1, 0, 0 },
                    300: };
                    301: 
                    302: 
                    303: #define BIRD_IFA_MAX  (IFA_FLAGS+1)
                    304: 
                    305: #ifndef IPV6
                    306: static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
                    307:   [IFA_ADDRESS]          = { 1, 1, sizeof(ip4_addr) },
                    308:   [IFA_LOCAL]    = { 1, 1, sizeof(ip4_addr) },
                    309:   [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
                    310:   [IFA_FLAGS]    = { 1, 1, sizeof(u32) },
                    311: };
                    312: #else
                    313: static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
                    314:   [IFA_ADDRESS]          = { 1, 1, sizeof(ip6_addr) },
                    315:   [IFA_LOCAL]    = { 1, 1, sizeof(ip6_addr) },
                    316:   [IFA_FLAGS]    = { 1, 1, sizeof(u32) },
                    317: };
                    318: #endif
                    319: 
                    320: 
                    321: #define BIRD_RTA_MAX  (RTA_TABLE+1)
                    322: 
                    323: static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = {
                    324:   [RTA_GATEWAY]          = { 1, 1, sizeof(ip4_addr) },
                    325: };
                    326: 
                    327: #ifndef IPV6
                    328: static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
                    329:   [RTA_DST]      = { 1, 1, sizeof(ip4_addr) },
                    330:   [RTA_OIF]      = { 1, 1, sizeof(u32) },
                    331:   [RTA_GATEWAY]          = { 1, 1, sizeof(ip4_addr) },
                    332:   [RTA_PRIORITY]  = { 1, 1, sizeof(u32) },
                    333:   [RTA_PREFSRC]          = { 1, 1, sizeof(ip4_addr) },
                    334:   [RTA_METRICS]          = { 1, 0, 0 },
                    335:   [RTA_MULTIPATH] = { 1, 0, 0 },
                    336:   [RTA_FLOW]     = { 1, 1, sizeof(u32) },
                    337:   [RTA_TABLE]    = { 1, 1, sizeof(u32) },
                    338: };
                    339: #else
                    340: static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
                    341:   [RTA_DST]      = { 1, 1, sizeof(ip6_addr) },
                    342:   [RTA_IIF]      = { 1, 1, sizeof(u32) },
                    343:   [RTA_OIF]      = { 1, 1, sizeof(u32) },
                    344:   [RTA_GATEWAY]          = { 1, 1, sizeof(ip6_addr) },
                    345:   [RTA_PRIORITY]  = { 1, 1, sizeof(u32) },
                    346:   [RTA_PREFSRC]          = { 1, 1, sizeof(ip6_addr) },
                    347:   [RTA_METRICS]          = { 1, 0, 0 },
                    348:   [RTA_FLOW]     = { 1, 1, sizeof(u32) },
                    349:   [RTA_TABLE]    = { 1, 1, sizeof(u32) },
                    350: };
                    351: #endif
                    352: 
                    353: 
                    354: static int
                    355: nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
                    356: {
                    357:   int max = ksize / sizeof(struct rtattr *);
                    358:   bzero(k, ksize);
                    359: 
                    360:   for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
                    361:     {
                    362:       if ((a->rta_type >= max) || !want[a->rta_type].defined)
                    363:        continue;
                    364: 
                    365:       if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
                    366:        {
                    367:          log(L_ERR "nl_parse_attrs: Malformed message received");
                    368:          return 0;
                    369:        }
                    370: 
                    371:       k[a->rta_type] = a;
                    372:     }
                    373: 
                    374:   if (nl_attr_len)
                    375:     {
                    376:       log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
                    377:       return 0;
                    378:     }
                    379: 
                    380:   return 1;
                    381: }
                    382: 
                    383: static inline u32 rta_get_u32(struct rtattr *a)
                    384: { return *(u32 *) RTA_DATA(a); }
                    385: 
                    386: static inline ip4_addr rta_get_ip4(struct rtattr *a)
                    387: { return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
                    388: 
                    389: static inline ip6_addr rta_get_ip6(struct rtattr *a)
                    390: { return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
                    391: 
                    392: 
                    393: struct rtattr *
                    394: nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
                    395: {
                    396:   uint pos = NLMSG_ALIGN(h->nlmsg_len);
                    397:   uint len = RTA_LENGTH(dlen);
                    398: 
                    399:   if (pos + len > bufsize)
                    400:     bug("nl_add_attr: packet buffer overflow");
                    401: 
                    402:   struct rtattr *a = (struct rtattr *)((char *)h + pos);
                    403:   a->rta_type = code;
                    404:   a->rta_len = len;
                    405:   h->nlmsg_len = pos + len;
                    406: 
                    407:   if (dlen > 0)
                    408:     memcpy(RTA_DATA(a), data, dlen);
                    409: 
                    410:   return a;
                    411: }
                    412: 
                    413: static inline void
                    414: nl_add_attr_u32(struct nlmsghdr *h, unsigned bufsize, int code, u32 data)
                    415: {
                    416:   nl_add_attr(h, bufsize, code, &data, 4);
                    417: }
                    418: 
                    419: static inline void
                    420: nl_add_attr_ipa(struct nlmsghdr *h, unsigned bufsize, int code, ip_addr ipa)
                    421: {
                    422:   ipa_hton(ipa);
                    423:   nl_add_attr(h, bufsize, code, &ipa, sizeof(ipa));
                    424: }
                    425: 
                    426: static inline struct rtattr *
                    427: nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
                    428: {
                    429:   return nl_add_attr(h, bufsize, code, NULL, 0);
                    430: }
                    431: 
                    432: static inline void
                    433: nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
                    434: {
                    435:   a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
                    436: }
                    437: 
                    438: static inline struct rtnexthop *
                    439: nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
                    440: {
                    441:   uint pos = NLMSG_ALIGN(h->nlmsg_len);
                    442:   uint len = RTNH_LENGTH(0);
                    443: 
                    444:   if (pos + len > bufsize)
                    445:     bug("nl_open_nexthop: packet buffer overflow");
                    446: 
                    447:   h->nlmsg_len = pos + len;
                    448: 
                    449:   return (void *)h + pos;
                    450: }
                    451: 
                    452: static inline void
                    453: nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
                    454: {
                    455:   nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
                    456: }
                    457: 
                    458: static void
                    459: nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh)
                    460: {
                    461:   struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
                    462: 
                    463:   for (; nh; nh = nh->next)
                    464:   {
                    465:     struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
                    466: 
                    467:     rtnh->rtnh_flags = 0;
                    468:     rtnh->rtnh_hops = nh->weight;
                    469:     rtnh->rtnh_ifindex = nh->iface->index;
                    470: 
                    471:     nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
                    472: 
                    473:     nl_close_nexthop(h, rtnh);
                    474:   }
                    475: 
                    476:   nl_close_attr(h, a);
                    477: }
                    478: 
                    479: static struct mpnh *
                    480: nl_parse_multipath(struct krt_proto *p, struct rtattr *ra)
                    481: {
                    482:   /* Temporary buffer for multicast nexthops */
                    483:   static struct mpnh *nh_buffer;
                    484:   static int nh_buf_size;      /* in number of structures */
                    485:   static int nh_buf_used;
                    486: 
                    487:   struct rtattr *a[BIRD_RTA_MAX];
                    488:   struct rtnexthop *nh = RTA_DATA(ra);
                    489:   struct mpnh *rv, *first, **last;
                    490:   unsigned len = RTA_PAYLOAD(ra);
                    491: 
                    492:   first = NULL;
                    493:   last = &first;
                    494:   nh_buf_used = 0;
                    495: 
                    496:   while (len)
                    497:     {
                    498:       /* Use RTNH_OK(nh,len) ?? */
                    499:       if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
                    500:        return NULL;
                    501: 
                    502:       if (nh_buf_used == nh_buf_size)
                    503:       {
                    504:        nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4;
                    505:        nh_buffer = xrealloc(nh_buffer, nh_buf_size * sizeof(struct mpnh));
                    506:       }
                    507:       *last = rv = nh_buffer + nh_buf_used++;
                    508:       rv->next = NULL;
                    509:       last = &(rv->next);
                    510: 
                    511:       rv->weight = nh->rtnh_hops;
                    512:       rv->iface = if_find_by_index(nh->rtnh_ifindex);
                    513:       if (!rv->iface)
                    514:        return NULL;
                    515: 
                    516:       /* Nonexistent RTNH_PAYLOAD ?? */
                    517:       nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
                    518:       nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want4, a, sizeof(a));
                    519:       if (a[RTA_GATEWAY])
                    520:        {
                    521:          memcpy(&rv->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ip_addr));
                    522:          ipa_ntoh(rv->gw);
                    523: 
                    524:          neighbor *ng = neigh_find2(&p->p, &rv->gw, rv->iface,
                    525:                                     (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
                    526:          if (!ng || (ng->scope == SCOPE_HOST))
                    527:            return NULL;
                    528:        }
                    529:       else
                    530:        return NULL;
                    531: 
                    532:       len -= NLMSG_ALIGN(nh->rtnh_len);
                    533:       nh = RTNH_NEXT(nh);
                    534:     }
                    535: 
                    536:   return first;
                    537: }
                    538: 
                    539: static void
                    540: nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
                    541: {
                    542:   struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
                    543:   int t;
                    544: 
                    545:   for (t = 1; t < max; t++)
                    546:     if (metrics[0] & (1 << t))
                    547:       nl_add_attr_u32(h, bufsize, t, metrics[t]);
                    548: 
                    549:   nl_close_attr(h, a);
                    550: }
                    551: 
                    552: static int
                    553: nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
                    554: {
                    555:   struct rtattr *a = RTA_DATA(hdr);
                    556:   int len = RTA_PAYLOAD(hdr);
                    557: 
                    558:   metrics[0] = 0;
                    559:   for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
                    560:   {
                    561:     if (a->rta_type == RTA_UNSPEC)
                    562:       continue;
                    563: 
                    564:     if (a->rta_type >= max)
                    565:       continue;
                    566: 
                    567:     if (RTA_PAYLOAD(a) != 4)
                    568:       return -1;
                    569: 
                    570:     metrics[0] |= 1 << a->rta_type;
                    571:     metrics[a->rta_type] = rta_get_u32(a);
                    572:   }
                    573: 
                    574:   if (len > 0)
                    575:     return -1;
                    576: 
                    577:   return 0;
                    578: }
                    579: 
                    580: 
                    581: /*
                    582:  *     Scanning of interfaces
                    583:  */
                    584: 
                    585: static void
                    586: nl_parse_link(struct nlmsghdr *h, int scan)
                    587: {
                    588:   struct ifinfomsg *i;
                    589:   struct rtattr *a[BIRD_IFLA_MAX];
                    590:   int new = h->nlmsg_type == RTM_NEWLINK;
                    591:   struct iface f = {};
                    592:   struct iface *ifi;
                    593:   char *name;
                    594:   u32 mtu;
                    595:   uint fl;
                    596: 
                    597:   if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
                    598:     return;
                    599:   if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
                    600:     {
                    601:       /*
                    602:        * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
                    603:        * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
                    604:        * We simply ignore all such messages with IFLA_WIRELESS without notice.
                    605:        */
                    606: 
                    607:       if (a[IFLA_WIRELESS])
                    608:        return;
                    609: 
                    610:       log(L_ERR "KIF: Malformed message received");
                    611:       return;
                    612:     }
                    613: 
                    614:   name = RTA_DATA(a[IFLA_IFNAME]);
                    615:   mtu = rta_get_u32(a[IFLA_MTU]);
                    616: 
                    617:   ifi = if_find_by_index(i->ifi_index);
                    618:   if (!new)
                    619:     {
                    620:       DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
                    621:       if (!ifi)
                    622:        return;
                    623: 
                    624:       if_delete(ifi);
                    625:     }
                    626:   else
                    627:     {
                    628:       DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
                    629:       if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
                    630:        if_delete(ifi);
                    631: 
                    632:       strncpy(f.name, name, sizeof(f.name)-1);
                    633:       f.index = i->ifi_index;
                    634:       f.mtu = mtu;
                    635: 
                    636:       fl = i->ifi_flags;
                    637:       if (fl & IFF_UP)
                    638:        f.flags |= IF_ADMIN_UP;
                    639:       if (fl & IFF_LOWER_UP)
                    640:        f.flags |= IF_LINK_UP;
                    641:       if (fl & IFF_LOOPBACK)           /* Loopback */
                    642:        f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
                    643:       else if (fl & IFF_POINTOPOINT)   /* PtP */
                    644:        f.flags |= IF_MULTICAST;
                    645:       else if (fl & IFF_BROADCAST)     /* Broadcast */
                    646:        f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
                    647:       else
                    648:        f.flags |= IF_MULTIACCESS;      /* NBMA */
                    649: 
                    650:       if (fl & IFF_MULTICAST)
                    651:        f.flags |= IF_MULTICAST;
                    652: 
                    653:       ifi = if_update(&f);
                    654: 
                    655:       if (!scan)
                    656:        if_end_partial_update(ifi);
                    657:     }
                    658: }
                    659: 
                    660: static void
                    661: nl_parse_addr(struct nlmsghdr *h, int scan)
                    662: {
                    663:   struct ifaddrmsg *i;
                    664:   struct rtattr *a[BIRD_IFA_MAX];
                    665:   int new = h->nlmsg_type == RTM_NEWADDR;
                    666:   struct ifa ifa;
                    667:   struct iface *ifi;
                    668:   int scope;
                    669:   u32 ifa_flags;
                    670: 
                    671:   if (!(i = nl_checkin(h, sizeof(*i))))
                    672:     return;
                    673: 
                    674:   switch (i->ifa_family)
                    675:     {
                    676: #ifndef IPV6
                    677:       case AF_INET:
                    678:        if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
                    679:          return;
                    680:        if (!a[IFA_LOCAL])
                    681:          {
                    682:            log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
                    683:            return;
                    684:          }
                    685:        break;
                    686: #else
                    687:       case AF_INET6:
                    688:        if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
                    689:          return;
                    690:        break;
                    691: #endif
                    692:       default:
                    693:        return;
                    694:     }
                    695: 
                    696:   if (!a[IFA_ADDRESS])
                    697:     {
                    698:       log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
                    699:       return;
                    700:     }
                    701: 
                    702:   if (a[IFA_FLAGS])
                    703:     ifa_flags = rta_get_u32(a[IFA_FLAGS]);
                    704:   else
                    705:     ifa_flags = i->ifa_flags;
                    706: 
                    707:   ifi = if_find_by_index(i->ifa_index);
                    708:   if (!ifi)
                    709:     {
                    710:       log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
                    711:       return;
                    712:     }
                    713: 
                    714:   bzero(&ifa, sizeof(ifa));
                    715:   ifa.iface = ifi;
                    716:   if (ifa_flags & IFA_F_SECONDARY)
                    717:     ifa.flags |= IA_SECONDARY;
                    718: 
                    719: #ifdef IPV6
                    720:   /* Ignore tentative addresses silently */
                    721:   if (ifa_flags & IFA_F_TENTATIVE)
                    722:     return;
                    723: #endif
                    724: 
                    725:   /* IFA_LOCAL can be unset for IPv6 interfaces */
                    726:   memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip));
                    727:   ipa_ntoh(ifa.ip);
                    728:   ifa.pxlen = i->ifa_prefixlen;
                    729:   if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS)
                    730:     {
                    731:       log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
                    732:       new = 0;
                    733:     }
                    734:   if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS)
                    735:     {
                    736:       ip_addr addr;
                    737:       memcpy(&addr, RTA_DATA(a[IFA_ADDRESS]), sizeof(addr));
                    738:       ipa_ntoh(addr);
                    739:       ifa.prefix = ifa.brd = addr;
                    740: 
                    741:       /* It is either a host address or a peer address */
                    742:       if (ipa_equal(ifa.ip, addr))
                    743:        ifa.flags |= IA_HOST;
                    744:       else
                    745:        {
                    746:          ifa.flags |= IA_PEER;
                    747:          ifa.opposite = addr;
                    748:        }
                    749:     }
                    750:   else
                    751:     {
                    752:       ip_addr netmask = ipa_mkmask(ifa.pxlen);
                    753:       ifa.prefix = ipa_and(ifa.ip, netmask);
                    754:       ifa.brd = ipa_or(ifa.ip, ipa_not(netmask));
                    755:       if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 1)
                    756:        ifa.opposite = ipa_opposite_m1(ifa.ip);
                    757: 
                    758: #ifndef IPV6
                    759:       if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 2)
                    760:        ifa.opposite = ipa_opposite_m2(ifa.ip);
                    761: 
                    762:       if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
                    763:        {
                    764:          ip_addr xbrd;
                    765:          memcpy(&xbrd, RTA_DATA(a[IFA_BROADCAST]), sizeof(xbrd));
                    766:          ipa_ntoh(xbrd);
                    767:          if (ipa_equal(xbrd, ifa.prefix) || ipa_equal(xbrd, ifa.brd))
                    768:            ifa.brd = xbrd;
                    769:          else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
                    770:            log(L_ERR "KIF: Invalid broadcast address %I for %s", xbrd, ifi->name);
                    771:        }
                    772: #endif
                    773:     }
                    774: 
                    775:   scope = ipa_classify(ifa.ip);
                    776:   if (scope < 0)
                    777:     {
                    778:       log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
                    779:       return;
                    780:     }
                    781:   ifa.scope = scope & IADDR_SCOPE_MASK;
                    782: 
                    783:   DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n",
                    784:       ifi->index, ifi->name,
                    785:       new ? "added" : "removed",
                    786:       ifa.ip, ifa.flags, ifa.prefix, ifa.pxlen, ifa.brd, ifa.opposite);
                    787: 
                    788:   if (new)
                    789:     ifa_update(&ifa);
                    790:   else
                    791:     ifa_delete(&ifa);
                    792: 
                    793:   if (!scan)
                    794:     if_end_partial_update(ifi);
                    795: }
                    796: 
                    797: void
                    798: kif_do_scan(struct kif_proto *p UNUSED)
                    799: {
                    800:   struct nlmsghdr *h;
                    801: 
                    802:   if_start_update();
                    803: 
                    804:   nl_request_dump(AF_UNSPEC, RTM_GETLINK);
                    805:   while (h = nl_get_scan())
                    806:     if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
                    807:       nl_parse_link(h, 1);
                    808:     else
                    809:       log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
                    810: 
                    811:   nl_request_dump(BIRD_AF, RTM_GETADDR);
                    812:   while (h = nl_get_scan())
                    813:     if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
                    814:       nl_parse_addr(h, 1);
                    815:     else
                    816:       log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
                    817: 
                    818:   if_end_update();
                    819: }
                    820: 
                    821: /*
                    822:  *     Routes
                    823:  */
                    824: 
                    825: static inline u32
                    826: krt_table_id(struct krt_proto *p)
                    827: {
                    828:   return KRT_CF->sys.table_id;
                    829: }
                    830: 
                    831: static HASH(struct krt_proto) nl_table_map;
                    832: 
                    833: #define RTH_FN(k)      u32_hash(k)
                    834: #define RTH_EQ(k1,k2)  k1 == k2
                    835: #define RTH_KEY(p)     krt_table_id(p)
                    836: #define RTH_NEXT(p)    p->sys.hash_next
                    837: 
                    838: #define RTH_REHASH             rth_rehash
                    839: #define RTH_PARAMS             /8, *2, 2, 2, 6, 20
                    840: 
                    841: HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
                    842: 
                    843: int
                    844: krt_capable(rte *e)
                    845: {
                    846:   rta *a = e->attrs;
                    847: 
                    848:   if (a->cast != RTC_UNICAST)
                    849:     return 0;
                    850: 
                    851:   switch (a->dest)
                    852:     {
                    853:     case RTD_ROUTER:
                    854:     case RTD_DEVICE:
                    855:       if (a->iface == NULL)
                    856:        return 0;
                    857:     case RTD_BLACKHOLE:
                    858:     case RTD_UNREACHABLE:
                    859:     case RTD_PROHIBIT:
                    860:     case RTD_MULTIPATH:
                    861:       break;
                    862:     default:
                    863:       return 0;
                    864:     }
                    865:   return 1;
                    866: }
                    867: 
                    868: static inline int
                    869: nh_bufsize(struct mpnh *nh)
                    870: {
                    871:   int rv = 0;
                    872:   for (; nh != NULL; nh = nh->next)
                    873:     rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
                    874:   return rv;
                    875: }
                    876: 
                    877: static int
                    878: nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, struct iface *iface)
                    879: {
                    880:   eattr *ea;
                    881:   net *net = e->net;
                    882:   rta *a = e->attrs;
                    883:   u32 priority = 0;
                    884: 
                    885:   struct {
                    886:     struct nlmsghdr h;
                    887:     struct rtmsg r;
                    888:     char buf[128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops)];
                    889:   } r;
                    890: 
                    891:   DBG("nl_send_route(%I/%d,op=%x)\n", net->n.prefix, net->n.pxlen, op);
                    892: 
                    893:   bzero(&r.h, sizeof(r.h));
                    894:   bzero(&r.r, sizeof(r.r));
                    895:   r.h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
                    896:   r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
                    897:   r.h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
                    898: 
                    899:   r.r.rtm_family = BIRD_AF;
                    900:   r.r.rtm_dst_len = net->n.pxlen;
                    901:   r.r.rtm_protocol = RTPROT_BIRD;
                    902:   r.r.rtm_scope = RT_SCOPE_NOWHERE;
                    903:   nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix);
                    904: 
                    905:   /*
                    906:    * Strange behavior for RTM_DELROUTE:
                    907:    * 1) rtm_family is ignored in IPv6, works for IPv4
                    908:    * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
                    909:    * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
                    910:    */
                    911: 
                    912:   if (krt_table_id(p) < 256)
                    913:     r.r.rtm_table = krt_table_id(p);
                    914:   else
                    915:     nl_add_attr_u32(&r.h, sizeof(r), RTA_TABLE, krt_table_id(p));
                    916: 
                    917:   if (a->source == RTS_DUMMY)
                    918:     priority = e->u.krt.metric;
                    919:   else if (KRT_CF->sys.metric)
                    920:     priority = KRT_CF->sys.metric;
                    921:   else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
                    922:     priority = ea->u.data;
                    923: 
                    924:   if (priority)
                    925:     nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, priority);
                    926: 
                    927:   /* For route delete, we do not specify remaining route attributes */
                    928:   if (op == NL_OP_DELETE)
                    929:     goto dest;
                    930: 
                    931:   /* Default scope is LINK for device routes, UNIVERSE otherwise */
                    932:   if (ea = ea_find(eattrs, EA_KRT_SCOPE))
                    933:     r.r.rtm_scope = ea->u.data;
                    934:   else
                    935:     r.r.rtm_scope = (dest == RTD_DEVICE) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
                    936: 
                    937:   if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
                    938:     nl_add_attr_ipa(&r.h, sizeof(r), RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
                    939: 
                    940:   if (ea = ea_find(eattrs, EA_KRT_REALM))
                    941:     nl_add_attr_u32(&r.h, sizeof(r), RTA_FLOW, ea->u.data);
                    942: 
                    943: 
                    944:   u32 metrics[KRT_METRICS_MAX];
                    945:   metrics[0] = 0;
                    946: 
                    947:   struct ea_walk_state ews = { .eattrs = eattrs };
                    948:   while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
                    949:   {
                    950:     int id = ea->id - EA_KRT_METRICS;
                    951:     metrics[0] |= 1 << id;
                    952:     metrics[id] = ea->u.data;
                    953:   }
                    954: 
                    955:   if (metrics[0])
                    956:     nl_add_metrics(&r.h, sizeof(r), metrics, KRT_METRICS_MAX);
                    957: 
                    958: 
                    959: dest:
                    960:   /* a->iface != NULL checked in krt_capable() for router and device routes */
                    961:   switch (dest)
                    962:     {
                    963:     case RTD_ROUTER:
                    964:       r.r.rtm_type = RTN_UNICAST;
                    965:       nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index);
                    966:       nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, gw);
                    967:       break;
                    968:     case RTD_DEVICE:
                    969:       r.r.rtm_type = RTN_UNICAST;
                    970:       nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index);
                    971:       break;
                    972:     case RTD_BLACKHOLE:
                    973:       r.r.rtm_type = RTN_BLACKHOLE;
                    974:       break;
                    975:     case RTD_UNREACHABLE:
                    976:       r.r.rtm_type = RTN_UNREACHABLE;
                    977:       break;
                    978:     case RTD_PROHIBIT:
                    979:       r.r.rtm_type = RTN_PROHIBIT;
                    980:       break;
                    981:     case RTD_MULTIPATH:
                    982:       r.r.rtm_type = RTN_UNICAST;
                    983:       nl_add_multipath(&r.h, sizeof(r), a->nexthops);
                    984:       break;
                    985:     case RTD_NONE:
                    986:       break;
                    987:     default:
                    988:       bug("krt_capable inconsistent with nl_send_route");
                    989:     }
                    990: 
                    991:   /* Ignore missing for DELETE */
                    992:   return nl_exchange(&r.h, (op == NL_OP_DELETE));
                    993: }
                    994: 
                    995: static inline int
                    996: nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
                    997: {
                    998:   rta *a = e->attrs;
                    999:   int err = 0;
                   1000: 
                   1001:   if (krt_ecmp6(p) && (a->dest == RTD_MULTIPATH))
                   1002:   {
                   1003:     struct mpnh *nh = a->nexthops;
                   1004: 
                   1005:     err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface);
                   1006:     if (err < 0)
                   1007:       return err;
                   1008: 
                   1009:     for (nh = nh->next; nh; nh = nh->next)
                   1010:       err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface);
                   1011: 
                   1012:     return err;
                   1013:   }
                   1014: 
                   1015:   return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface);
                   1016: }
                   1017: 
                   1018: static inline int
                   1019: nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
                   1020: {
                   1021:   int err = 0;
                   1022: 
                   1023:   /* For IPv6, we just repeatedly request DELETE until we get error */
                   1024:   do
                   1025:     err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL);
                   1026:   while (krt_ecmp6(p) && !err);
                   1027: 
                   1028:   return err;
                   1029: }
                   1030: 
                   1031: void
                   1032: krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list *eattrs)
                   1033: {
                   1034:   int err = 0;
                   1035: 
                   1036:   /*
                   1037:    * We could use NL_OP_REPLACE, but route replace on Linux has some problems:
                   1038:    *
                   1039:    * 1) Does not check for matching rtm_protocol
                   1040:    * 2) Has broken semantics for IPv6 ECMP
                   1041:    * 3) Crashes some kernel version when used for IPv6 ECMP
                   1042:    *
                   1043:    * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
                   1044:    * route value, so we do not try to optimize IPv6 ECMP reconfigurations.
                   1045:    */
                   1046: 
                   1047:   if (old)
                   1048:     nl_delete_rte(p, old, eattrs);
                   1049: 
                   1050:   if (new)
                   1051:     err = nl_add_rte(p, new, eattrs);
                   1052: 
                   1053:   if (err < 0)
                   1054:     n->n.flags |= KRF_SYNC_ERROR;
                   1055:   else
                   1056:     n->n.flags &= ~KRF_SYNC_ERROR;
                   1057: }
                   1058: 
                   1059: 
                   1060: static inline struct mpnh *
                   1061: nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight)
                   1062: {
                   1063:   struct mpnh *nh = lp_alloc(s->pool, sizeof(struct mpnh));
                   1064: 
                   1065:   nh->gw = gw;
                   1066:   nh->iface = iface;
                   1067:   nh->next = NULL;
                   1068:   nh->weight = weight;
                   1069: 
                   1070:   return nh;
                   1071: }
                   1072: 
                   1073: static int
                   1074: nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type)
                   1075: {
                   1076:   /* Route merging must be active */
                   1077:   if (!s->merge)
                   1078:     return 0;
                   1079: 
                   1080:   /* Saved and new route must have same network, proto/table, and priority */
                   1081:   if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
                   1082:     return 0;
                   1083: 
                   1084:   /* Both must be regular unicast routes */
                   1085:   if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
                   1086:     return 0;
                   1087: 
                   1088:   return 1;
                   1089: }
                   1090: 
                   1091: static void
                   1092: nl_announce_route(struct nl_parse_state *s)
                   1093: {
                   1094:   rte *e = rte_get_temp(s->attrs);
                   1095:   e->net = s->net;
                   1096:   e->u.krt.src = s->krt_src;
                   1097:   e->u.krt.proto = s->krt_proto;
                   1098:   e->u.krt.seen = 0;
                   1099:   e->u.krt.best = 0;
                   1100:   e->u.krt.metric = s->krt_metric;
                   1101: 
                   1102:   if (s->scan)
                   1103:     krt_got_route(s->proto, e);
                   1104:   else
                   1105:     krt_got_route_async(s->proto, e, s->new);
                   1106: 
                   1107:   s->net = NULL;
                   1108:   s->attrs = NULL;
                   1109:   s->proto = NULL;
                   1110:   lp_flush(s->pool);
                   1111: }
                   1112: 
                   1113: static inline void
                   1114: nl_parse_begin(struct nl_parse_state *s, int scan, int merge)
                   1115: {
                   1116:   memset(s, 0, sizeof (struct nl_parse_state));
                   1117:   s->pool = nl_linpool;
                   1118:   s->scan = scan;
                   1119:   s->merge = merge;
                   1120: }
                   1121: 
                   1122: static inline void
                   1123: nl_parse_end(struct nl_parse_state *s)
                   1124: {
                   1125:   if (s->net)
                   1126:     nl_announce_route(s);
                   1127: }
                   1128: 
                   1129: 
                   1130: #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
                   1131: 
                   1132: static void
                   1133: nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
                   1134: {
                   1135:   struct krt_proto *p;
                   1136:   struct rtmsg *i;
                   1137:   struct rtattr *a[BIRD_RTA_MAX];
                   1138:   int new = h->nlmsg_type == RTM_NEWROUTE;
                   1139: 
                   1140:   ip_addr dst = IPA_NONE;
                   1141:   u32 oif = ~0;
                   1142:   u32 table;
                   1143:   u32 priority = 0;
                   1144:   u32 def_scope = RT_SCOPE_UNIVERSE;
                   1145:   int src;
                   1146: 
                   1147:   if (!(i = nl_checkin(h, sizeof(*i))))
                   1148:     return;
                   1149: 
                   1150:   switch (i->rtm_family)
                   1151:     {
                   1152: #ifndef IPV6
                   1153:       case AF_INET:
                   1154:        if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
                   1155:          return;
                   1156:        break;
                   1157: #else
                   1158:       case AF_INET6:
                   1159:        if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
                   1160:          return;
                   1161:        break;
                   1162: #endif
                   1163:       default:
                   1164:        return;
                   1165:     }
                   1166: 
                   1167:   if (a[RTA_DST])
                   1168:     {
                   1169:       memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst));
                   1170:       ipa_ntoh(dst);
                   1171:     }
                   1172: 
                   1173:   if (a[RTA_OIF])
                   1174:     oif = rta_get_u32(a[RTA_OIF]);
                   1175: 
                   1176:   if (a[RTA_TABLE])
                   1177:     table = rta_get_u32(a[RTA_TABLE]);
                   1178:   else
                   1179:     table = i->rtm_table;
                   1180: 
                   1181:   p = HASH_FIND(nl_table_map, RTH, table); /* Do we know this table? */
                   1182:   DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, table, i->rtm_protocol, p ? p->p.name : "(none)");
                   1183:   if (!p)
                   1184:     SKIP("unknown table %d\n", table);
                   1185: 
                   1186: #ifdef IPV6
                   1187:   if (a[RTA_IIF])
                   1188:     SKIP("IIF set\n");
                   1189: #else
                   1190:   if (i->rtm_tos != 0)                 /* We don't support TOS */
                   1191:     SKIP("TOS %02x\n", i->rtm_tos);
                   1192: #endif
                   1193: 
                   1194:   if (s->scan && !new)
                   1195:     SKIP("RTM_DELROUTE in scan\n");
                   1196: 
                   1197:   if (a[RTA_PRIORITY])
                   1198:     priority = rta_get_u32(a[RTA_PRIORITY]);
                   1199: 
                   1200:   int c = ipa_classify_net(dst);
                   1201:   if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
                   1202:     SKIP("strange class/scope\n");
                   1203: 
                   1204:   switch (i->rtm_protocol)
                   1205:     {
                   1206:     case RTPROT_UNSPEC:
                   1207:       SKIP("proto unspec\n");
                   1208: 
                   1209:     case RTPROT_REDIRECT:
                   1210:       src = KRT_SRC_REDIRECT;
                   1211:       break;
                   1212: 
                   1213:     case RTPROT_KERNEL:
                   1214:       src = KRT_SRC_KERNEL;
                   1215:       return;
                   1216: 
                   1217:     case RTPROT_BIRD:
                   1218:       if (!s->scan)
                   1219:        SKIP("echo\n");
                   1220:       src = KRT_SRC_BIRD;
                   1221:       break;
                   1222: 
                   1223:     case RTPROT_BOOT:
                   1224:     default:
                   1225:       src = KRT_SRC_ALIEN;
                   1226:     }
                   1227: 
                   1228:   net *net = net_get(p->p.table, dst, i->rtm_dst_len);
                   1229: 
                   1230:   if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type))
                   1231:     nl_announce_route(s);
                   1232: 
                   1233:   rta *ra = lp_allocz(s->pool, sizeof(rta));
                   1234:   ra->src = p->p.main_source;
                   1235:   ra->source = RTS_INHERIT;
                   1236:   ra->scope = SCOPE_UNIVERSE;
                   1237:   ra->cast = RTC_UNICAST;
                   1238: 
                   1239:   switch (i->rtm_type)
                   1240:     {
                   1241:     case RTN_UNICAST:
                   1242: 
                   1243:       if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
                   1244:        {
                   1245:          ra->dest = RTD_MULTIPATH;
                   1246:          ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
                   1247:          if (!ra->nexthops)
                   1248:            {
                   1249:              log(L_ERR "KRT: Received strange multipath route %I/%d",
                   1250:                  net->n.prefix, net->n.pxlen);
                   1251:              return;
                   1252:            }
                   1253: 
                   1254:          break;
                   1255:        }
                   1256: 
                   1257:       ra->iface = if_find_by_index(oif);
                   1258:       if (!ra->iface)
                   1259:        {
                   1260:          log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
                   1261:              net->n.prefix, net->n.pxlen, oif);
                   1262:          return;
                   1263:        }
                   1264: 
                   1265:       if (a[RTA_GATEWAY])
                   1266:        {
                   1267:          neighbor *ng;
                   1268:          ra->dest = RTD_ROUTER;
                   1269:          memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw));
                   1270:          ipa_ntoh(ra->gw);
                   1271: 
                   1272: #ifdef IPV6
                   1273:          /* Silently skip strange 6to4 routes */
                   1274:          if (ipa_in_net(ra->gw, IPA_NONE, 96))
                   1275:            return;
                   1276: #endif
                   1277: 
                   1278:          ng = neigh_find2(&p->p, &ra->gw, ra->iface,
                   1279:                           (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
                   1280:          if (!ng || (ng->scope == SCOPE_HOST))
                   1281:            {
                   1282:              log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",
                   1283:                  net->n.prefix, net->n.pxlen, ra->gw);
                   1284:              return;
                   1285:            }
                   1286:        }
                   1287:       else
                   1288:        {
                   1289:          ra->dest = RTD_DEVICE;
                   1290:          def_scope = RT_SCOPE_LINK;
                   1291:        }
                   1292: 
                   1293:       break;
                   1294:     case RTN_BLACKHOLE:
                   1295:       ra->dest = RTD_BLACKHOLE;
                   1296:       break;
                   1297:     case RTN_UNREACHABLE:
                   1298:       ra->dest = RTD_UNREACHABLE;
                   1299:       break;
                   1300:     case RTN_PROHIBIT:
                   1301:       ra->dest = RTD_PROHIBIT;
                   1302:       break;
                   1303:     /* FIXME: What about RTN_THROW? */
                   1304:     default:
                   1305:       SKIP("type %d\n", i->rtm_type);
                   1306:       return;
                   1307:     }
                   1308: 
                   1309:   if (i->rtm_scope != def_scope)
                   1310:     {
                   1311:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
                   1312:       ea->next = ra->eattrs;
                   1313:       ra->eattrs = ea;
                   1314:       ea->flags = EALF_SORTED;
                   1315:       ea->count = 1;
                   1316:       ea->attrs[0].id = EA_KRT_SCOPE;
                   1317:       ea->attrs[0].flags = 0;
                   1318:       ea->attrs[0].type = EAF_TYPE_INT;
                   1319:       ea->attrs[0].u.data = i->rtm_scope;
                   1320:     }
                   1321: 
                   1322:   if (a[RTA_PREFSRC])
                   1323:     {
                   1324:       ip_addr ps;
                   1325:       memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps));
                   1326:       ipa_ntoh(ps);
                   1327: 
                   1328:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
                   1329:       ea->next = ra->eattrs;
                   1330:       ra->eattrs = ea;
                   1331:       ea->flags = EALF_SORTED;
                   1332:       ea->count = 1;
                   1333:       ea->attrs[0].id = EA_KRT_PREFSRC;
                   1334:       ea->attrs[0].flags = 0;
                   1335:       ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
                   1336:       ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
                   1337:       ea->attrs[0].u.ptr->length = sizeof(ps);
                   1338:       memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
                   1339:     }
                   1340: 
                   1341:   if (a[RTA_FLOW])
                   1342:     {
                   1343:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
                   1344:       ea->next = ra->eattrs;
                   1345:       ra->eattrs = ea;
                   1346:       ea->flags = EALF_SORTED;
                   1347:       ea->count = 1;
                   1348:       ea->attrs[0].id = EA_KRT_REALM;
                   1349:       ea->attrs[0].flags = 0;
                   1350:       ea->attrs[0].type = EAF_TYPE_INT;
                   1351:       ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
                   1352:     }
                   1353: 
                   1354:   if (a[RTA_METRICS])
                   1355:     {
                   1356:       u32 metrics[KRT_METRICS_MAX];
                   1357:       ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
                   1358:       int t, n = 0;
                   1359: 
                   1360:       if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
                   1361:         {
                   1362:          log(L_ERR "KRT: Received route %I/%d with strange RTA_METRICS attribute",
                   1363:              net->n.prefix, net->n.pxlen);
                   1364:          return;
                   1365:        }
                   1366: 
                   1367:       for (t = 1; t < KRT_METRICS_MAX; t++)
                   1368:        if (metrics[0] & (1 << t))
                   1369:          {
                   1370:            ea->attrs[n].id = EA_CODE(EAP_KRT, KRT_METRICS_OFFSET + t);
                   1371:            ea->attrs[n].flags = 0;
                   1372:            ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
                   1373:            ea->attrs[n].u.data = metrics[t];
                   1374:            n++;
                   1375:          }
                   1376: 
                   1377:       if (n > 0)
                   1378:         {
                   1379:          ea->next = ra->eattrs;
                   1380:          ea->flags = EALF_SORTED;
                   1381:          ea->count = n;
                   1382:          ra->eattrs = ea;
                   1383:        }
                   1384:     }
                   1385: 
                   1386:   /*
                   1387:    * Ideally, now we would send the received route to the rest of kernel code.
                   1388:    * But IPv6 ECMP routes are sent as a sequence of routes, so we postpone it
                   1389:    * and merge next hops until the end of the sequence.
                   1390:    */
                   1391: 
                   1392:   if (!s->net)
                   1393:   {
                   1394:     /* Store the new route */
                   1395:     s->net = net;
                   1396:     s->attrs = ra;
                   1397:     s->proto = p;
                   1398:     s->new = new;
                   1399:     s->krt_src = src;
                   1400:     s->krt_type = i->rtm_type;
                   1401:     s->krt_proto = i->rtm_protocol;
                   1402:     s->krt_metric = priority;
                   1403:   }
                   1404:   else
                   1405:   {
                   1406:     /* Merge next hops with the stored route */
                   1407:     rta *a = s->attrs;
                   1408: 
                   1409:     if (a->dest != RTD_MULTIPATH)
                   1410:     {
                   1411:       a->dest = RTD_MULTIPATH;
                   1412:       a->nexthops = nl_alloc_mpnh(s, a->gw, a->iface, 0);
                   1413:     }
                   1414: 
                   1415:     mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->iface, 0));
                   1416:   }
                   1417: }
                   1418: 
                   1419: void
                   1420: krt_do_scan(struct krt_proto *p UNUSED)        /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
                   1421: {
                   1422:   struct nlmsghdr *h;
                   1423:   struct nl_parse_state s;
                   1424: 
                   1425:   nl_parse_begin(&s, 1, krt_ecmp6(p));
                   1426: 
                   1427:   nl_request_dump(BIRD_AF, RTM_GETROUTE);
                   1428:   while (h = nl_get_scan())
                   1429:     if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
                   1430:       nl_parse_route(&s, h);
                   1431:     else
                   1432:       log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
                   1433: 
                   1434:   nl_parse_end(&s);
                   1435: }
                   1436: 
                   1437: /*
                   1438:  *     Asynchronous Netlink interface
                   1439:  */
                   1440: 
                   1441: static sock *nl_async_sk;              /* BIRD socket for asynchronous notifications */
                   1442: static byte *nl_async_rx_buffer;       /* Receive buffer */
                   1443: 
                   1444: static void
                   1445: nl_async_msg(struct nlmsghdr *h)
                   1446: {
                   1447:   struct nl_parse_state s;
                   1448: 
                   1449:   switch (h->nlmsg_type)
                   1450:     {
                   1451:     case RTM_NEWROUTE:
                   1452:     case RTM_DELROUTE:
                   1453:       DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
                   1454:       nl_parse_begin(&s, 0, 0);
                   1455:       nl_parse_route(&s, h);
                   1456:       nl_parse_end(&s);
                   1457:       break;
                   1458:     case RTM_NEWLINK:
                   1459:     case RTM_DELLINK:
                   1460:       DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
                   1461:       if (kif_proto)
                   1462:        nl_parse_link(h, 0);
                   1463:       break;
                   1464:     case RTM_NEWADDR:
                   1465:     case RTM_DELADDR:
                   1466:       DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
                   1467:       if (kif_proto)
                   1468:        nl_parse_addr(h, 0);
                   1469:       break;
                   1470:     default:
                   1471:       DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
                   1472:     }
                   1473: }
                   1474: 
                   1475: static int
                   1476: nl_async_hook(sock *sk, uint size UNUSED)
                   1477: {
                   1478:   struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
                   1479:   struct sockaddr_nl sa;
                   1480:   struct msghdr m = {
                   1481:     .msg_name = &sa,
                   1482:     .msg_namelen = sizeof(sa),
                   1483:     .msg_iov = &iov,
                   1484:     .msg_iovlen = 1,
                   1485:   };
                   1486:   struct nlmsghdr *h;
                   1487:   int x;
                   1488:   uint len;
                   1489: 
                   1490:   x = recvmsg(sk->fd, &m, 0);
                   1491:   if (x < 0)
                   1492:     {
                   1493:       if (errno == ENOBUFS)
                   1494:        {
                   1495:          /*
                   1496:           *  Netlink reports some packets have been thrown away.
                   1497:           *  One day we might react to it by asking for route table
                   1498:           *  scan in near future.
                   1499:           */
                   1500:          log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
                   1501:          return 1;     /* More data are likely to be ready */
                   1502:        }
                   1503:       else if (errno != EWOULDBLOCK)
                   1504:        log(L_ERR "Netlink recvmsg: %m");
                   1505:       return 0;
                   1506:     }
                   1507:   if (sa.nl_pid)               /* It isn't from the kernel */
                   1508:     {
                   1509:       DBG("Non-kernel packet\n");
                   1510:       return 1;
                   1511:     }
                   1512:   h = (void *) nl_async_rx_buffer;
                   1513:   len = x;
                   1514:   if (m.msg_flags & MSG_TRUNC)
                   1515:     {
                   1516:       log(L_WARN "Netlink got truncated asynchronous message");
                   1517:       return 1;
                   1518:     }
                   1519:   while (NLMSG_OK(h, len))
                   1520:     {
                   1521:       nl_async_msg(h);
                   1522:       h = NLMSG_NEXT(h, len);
                   1523:     }
                   1524:   if (len)
                   1525:     log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
                   1526:   return 1;
                   1527: }
                   1528: 
                   1529: static void
                   1530: nl_async_err_hook(sock *sk, int e UNUSED)
                   1531: {
                   1532:   nl_async_hook(sk, 0);
                   1533: }
                   1534: 
                   1535: static void
                   1536: nl_open_async(void)
                   1537: {
                   1538:   sock *sk;
                   1539:   struct sockaddr_nl sa;
                   1540:   int fd;
                   1541: 
                   1542:   if (nl_async_sk)
                   1543:     return;
                   1544: 
                   1545:   DBG("KRT: Opening async netlink socket\n");
                   1546: 
                   1547:   fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
                   1548:   if (fd < 0)
                   1549:     {
                   1550:       log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
                   1551:       return;
                   1552:     }
                   1553: 
                   1554:   bzero(&sa, sizeof(sa));
                   1555:   sa.nl_family = AF_NETLINK;
                   1556: #ifdef IPV6
                   1557:   sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
                   1558: #else
                   1559:   sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE;
                   1560: #endif
                   1561:   if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
                   1562:     {
                   1563:       log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
                   1564:       close(fd);
                   1565:       return;
                   1566:     }
                   1567: 
                   1568:   nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
                   1569: 
                   1570:   sk = nl_async_sk = sk_new(krt_pool);
                   1571:   sk->type = SK_MAGIC;
                   1572:   sk->rx_hook = nl_async_hook;
                   1573:   sk->err_hook = nl_async_err_hook;
                   1574:   sk->fd = fd;
                   1575:   if (sk_open(sk) < 0)
                   1576:     bug("Netlink: sk_open failed");
                   1577: }
                   1578: 
                   1579: 
                   1580: /*
                   1581:  *     Interface to the UNIX krt module
                   1582:  */
                   1583: 
                   1584: void
                   1585: krt_sys_io_init(void)
                   1586: {
                   1587:   nl_linpool = lp_new(krt_pool, 4080);
                   1588:   HASH_INIT(nl_table_map, krt_pool, 6);
                   1589: }
                   1590: 
                   1591: int
                   1592: krt_sys_start(struct krt_proto *p)
                   1593: {
                   1594:   struct krt_proto *old = HASH_FIND(nl_table_map, RTH, krt_table_id(p));
                   1595: 
                   1596:   if (old)
                   1597:     {
                   1598:       log(L_ERR "%s: Kernel table %u already registered by %s",
                   1599:          p->p.name, krt_table_id(p), old->p.name);
                   1600:       return 0;
                   1601:     }
                   1602: 
                   1603:   HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
                   1604: 
                   1605:   nl_open();
                   1606:   nl_open_async();
                   1607: 
                   1608:   return 1;
                   1609: }
                   1610: 
                   1611: void
                   1612: krt_sys_shutdown(struct krt_proto *p)
                   1613: {
                   1614:   HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
                   1615: }
                   1616: 
                   1617: int
                   1618: krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
                   1619: {
                   1620:   return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
                   1621: }
                   1622: 
                   1623: void
                   1624: krt_sys_init_config(struct krt_config *cf)
                   1625: {
                   1626:   cf->sys.table_id = RT_TABLE_MAIN;
                   1627:   cf->sys.metric = 0;
                   1628: }
                   1629: 
                   1630: void
                   1631: krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
                   1632: {
                   1633:   d->sys.table_id = s->sys.table_id;
                   1634:   d->sys.metric = s->sys.metric;
                   1635: }
                   1636: 
                   1637: static const char *krt_metrics_names[KRT_METRICS_MAX] = {
                   1638:   NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
                   1639:   "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
                   1640: };
                   1641: 
                   1642: static const char *krt_features_names[KRT_FEATURES_MAX] = {
                   1643:   "ecn", NULL, NULL, "allfrag"
                   1644: };
                   1645: 
                   1646: int
                   1647: krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
                   1648: {
                   1649:   switch (a->id)
                   1650:   {
                   1651:   case EA_KRT_PREFSRC:
                   1652:     bsprintf(buf, "prefsrc");
                   1653:     return GA_NAME;
                   1654: 
                   1655:   case EA_KRT_REALM:
                   1656:     bsprintf(buf, "realm");
                   1657:     return GA_NAME;
                   1658: 
                   1659:   case EA_KRT_SCOPE:
                   1660:     bsprintf(buf, "scope");
                   1661:     return GA_NAME;
                   1662: 
                   1663:   case EA_KRT_LOCK:
                   1664:     buf += bsprintf(buf, "lock:");
                   1665:     ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
                   1666:     return GA_FULL;
                   1667: 
                   1668:   case EA_KRT_FEATURES:
                   1669:     buf += bsprintf(buf, "features:");
                   1670:     ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
                   1671:     return GA_FULL;
                   1672: 
                   1673:   default:;
                   1674:     int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
                   1675:     if (id > 0 && id < KRT_METRICS_MAX)
                   1676:     {
                   1677:       bsprintf(buf, "%s", krt_metrics_names[id]);
                   1678:       return GA_NAME;
                   1679:     }
                   1680: 
                   1681:     return GA_UNKNOWN;
                   1682:   }
                   1683: }
                   1684: 
                   1685: 
                   1686: 
                   1687: void
                   1688: kif_sys_start(struct kif_proto *p UNUSED)
                   1689: {
                   1690:   nl_open();
                   1691:   nl_open_async();
                   1692: }
                   1693: 
                   1694: void
                   1695: kif_sys_shutdown(struct kif_proto *p UNUSED)
                   1696: {
                   1697: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>