Annotation of embedaddon/bird/sysdep/linux/netlink.c, revision 1.1.1.1
1.1 misho 1: /*
2: * BIRD -- Linux Netlink Interface
3: *
4: * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5: *
6: * Can be freely distributed and used under the terms of the GNU GPL.
7: */
8:
9: #include <stdio.h>
10: #include <unistd.h>
11: #include <fcntl.h>
12: #include <sys/socket.h>
13: #include <sys/uio.h>
14: #include <errno.h>
15:
16: #undef LOCAL_DEBUG
17:
18: #include "nest/bird.h"
19: #include "nest/route.h"
20: #include "nest/protocol.h"
21: #include "nest/iface.h"
22: #include "lib/timer.h"
23: #include "lib/unix.h"
24: #include "lib/krt.h"
25: #include "lib/socket.h"
26: #include "lib/string.h"
27: #include "lib/hash.h"
28: #include "conf/conf.h"
29:
30: #include <asm/types.h>
31: #include <linux/if.h>
32: #include <linux/netlink.h>
33: #include <linux/rtnetlink.h>
34:
35:
36: #ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
37: #define MSG_TRUNC 0x20
38: #endif
39:
40: #ifndef IFA_FLAGS
41: #define IFA_FLAGS 8
42: #endif
43:
44: #ifndef IFF_LOWER_UP
45: #define IFF_LOWER_UP 0x10000
46: #endif
47:
48: #ifndef RTA_TABLE
49: #define RTA_TABLE 15
50: #endif
51:
52:
53: #ifdef IPV6
54: #define krt_ecmp6(X) 1
55: #else
56: #define krt_ecmp6(X) 0
57: #endif
58:
59: /*
60: * Structure nl_parse_state keeps state of received route processing. Ideally,
61: * we could just independently parse received Netlink messages and immediately
62: * propagate received routes to the rest of BIRD, but Linux kernel represents
63: * and announces IPv6 ECMP routes not as one route with multiple next hops (like
64: * RTA_MULTIPATH in IPv4 ECMP), but as a set of routes with the same prefix.
65: *
66: * Therefore, BIRD keeps currently processed route in nl_parse_state structure
67: * and postpones its propagation until we expect it to be final; i.e., when
68: * non-matching route is received or when the scan ends. When another matching
69: * route is received, it is merged with the already processed route to form an
70: * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
71: * postponing is done in both cases (for simplicity). All IPv4 routes are just
72: * considered non-matching.
73: *
74: * This is ignored for asynchronous notifications (every notification is handled
75: * as a separate route). It is not an issue for our routes, as we ignore such
76: * notifications anyways. But importing alien IPv6 ECMP routes does not work
77: * properly.
78: */
79:
80: struct nl_parse_state
81: {
82: struct linpool *pool;
83: int scan;
84: int merge;
85:
86: net *net;
87: rta *attrs;
88: struct krt_proto *proto;
89: s8 new;
90: s8 krt_src;
91: u8 krt_type;
92: u8 krt_proto;
93: u32 krt_metric;
94: };
95:
96: /*
97: * Synchronous Netlink interface
98: */
99:
100: struct nl_sock
101: {
102: int fd;
103: u32 seq;
104: byte *rx_buffer; /* Receive buffer */
105: struct nlmsghdr *last_hdr; /* Recently received packet */
106: uint last_size;
107: };
108:
109: #define NL_RX_SIZE 8192
110:
111: #define NL_OP_DELETE 0
112: #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
113: #define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
114: #define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
115:
116: static linpool *nl_linpool;
117:
118: static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
119: static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
120:
121: static void
122: nl_open_sock(struct nl_sock *nl)
123: {
124: if (nl->fd < 0)
125: {
126: nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
127: if (nl->fd < 0)
128: die("Unable to open rtnetlink socket: %m");
129: nl->seq = now;
130: nl->rx_buffer = xmalloc(NL_RX_SIZE);
131: nl->last_hdr = NULL;
132: nl->last_size = 0;
133: }
134: }
135:
136: static void
137: nl_open(void)
138: {
139: nl_open_sock(&nl_scan);
140: nl_open_sock(&nl_req);
141: }
142:
143: static void
144: nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
145: {
146: struct sockaddr_nl sa;
147:
148: memset(&sa, 0, sizeof(sa));
149: sa.nl_family = AF_NETLINK;
150: nh->nlmsg_pid = 0;
151: nh->nlmsg_seq = ++(nl->seq);
152: if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
153: die("rtnetlink sendto: %m");
154: nl->last_hdr = NULL;
155: }
156:
157: static void
158: nl_request_dump(int af, int cmd)
159: {
160: struct {
161: struct nlmsghdr nh;
162: struct rtgenmsg g;
163: } req = {
164: .nh.nlmsg_type = cmd,
165: .nh.nlmsg_len = sizeof(req),
166: .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
167: .g.rtgen_family = af
168: };
169: nl_send(&nl_scan, &req.nh);
170: }
171:
172: static struct nlmsghdr *
173: nl_get_reply(struct nl_sock *nl)
174: {
175: for(;;)
176: {
177: if (!nl->last_hdr)
178: {
179: struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
180: struct sockaddr_nl sa;
181: struct msghdr m = {
182: .msg_name = &sa,
183: .msg_namelen = sizeof(sa),
184: .msg_iov = &iov,
185: .msg_iovlen = 1,
186: };
187: int x = recvmsg(nl->fd, &m, 0);
188: if (x < 0)
189: die("nl_get_reply: %m");
190: if (sa.nl_pid) /* It isn't from the kernel */
191: {
192: DBG("Non-kernel packet\n");
193: continue;
194: }
195: nl->last_size = x;
196: nl->last_hdr = (void *) nl->rx_buffer;
197: if (m.msg_flags & MSG_TRUNC)
198: bug("nl_get_reply: got truncated reply which should be impossible");
199: }
200: if (NLMSG_OK(nl->last_hdr, nl->last_size))
201: {
202: struct nlmsghdr *h = nl->last_hdr;
203: nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
204: if (h->nlmsg_seq != nl->seq)
205: {
206: log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
207: h->nlmsg_seq, nl->seq);
208: continue;
209: }
210: return h;
211: }
212: if (nl->last_size)
213: log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
214: nl->last_hdr = NULL;
215: }
216: }
217:
218: static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
219:
220: static int
221: nl_error(struct nlmsghdr *h, int ignore_esrch)
222: {
223: struct nlmsgerr *e;
224: int ec;
225:
226: if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
227: {
228: log(L_WARN "Netlink: Truncated error message received");
229: return ENOBUFS;
230: }
231: e = (struct nlmsgerr *) NLMSG_DATA(h);
232: ec = -e->error;
233: if (ec && !(ignore_esrch && (ec == ESRCH)))
234: log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
235: return ec;
236: }
237:
238: static struct nlmsghdr *
239: nl_get_scan(void)
240: {
241: struct nlmsghdr *h = nl_get_reply(&nl_scan);
242:
243: if (h->nlmsg_type == NLMSG_DONE)
244: return NULL;
245: if (h->nlmsg_type == NLMSG_ERROR)
246: {
247: nl_error(h, 0);
248: return NULL;
249: }
250: return h;
251: }
252:
253: static int
254: nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
255: {
256: struct nlmsghdr *h;
257:
258: nl_send(&nl_req, pkt);
259: for(;;)
260: {
261: h = nl_get_reply(&nl_req);
262: if (h->nlmsg_type == NLMSG_ERROR)
263: break;
264: log(L_WARN "nl_exchange: Unexpected reply received");
265: }
266: return nl_error(h, ignore_esrch) ? -1 : 0;
267: }
268:
269: /*
270: * Netlink attributes
271: */
272:
273: static int nl_attr_len;
274:
275: static void *
276: nl_checkin(struct nlmsghdr *h, int lsize)
277: {
278: nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
279: if (nl_attr_len < 0)
280: {
281: log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
282: return NULL;
283: }
284: return NLMSG_DATA(h);
285: }
286:
287: struct nl_want_attrs {
288: u8 defined:1;
289: u8 checksize:1;
290: u8 size;
291: };
292:
293:
294: #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
295:
296: static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
297: [IFLA_IFNAME] = { 1, 0, 0 },
298: [IFLA_MTU] = { 1, 1, sizeof(u32) },
299: [IFLA_WIRELESS] = { 1, 0, 0 },
300: };
301:
302:
303: #define BIRD_IFA_MAX (IFA_FLAGS+1)
304:
305: #ifndef IPV6
306: static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
307: [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
308: [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
309: [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
310: [IFA_FLAGS] = { 1, 1, sizeof(u32) },
311: };
312: #else
313: static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
314: [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
315: [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
316: [IFA_FLAGS] = { 1, 1, sizeof(u32) },
317: };
318: #endif
319:
320:
321: #define BIRD_RTA_MAX (RTA_TABLE+1)
322:
323: static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = {
324: [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
325: };
326:
327: #ifndef IPV6
328: static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
329: [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
330: [RTA_OIF] = { 1, 1, sizeof(u32) },
331: [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
332: [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
333: [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
334: [RTA_METRICS] = { 1, 0, 0 },
335: [RTA_MULTIPATH] = { 1, 0, 0 },
336: [RTA_FLOW] = { 1, 1, sizeof(u32) },
337: [RTA_TABLE] = { 1, 1, sizeof(u32) },
338: };
339: #else
340: static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
341: [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
342: [RTA_IIF] = { 1, 1, sizeof(u32) },
343: [RTA_OIF] = { 1, 1, sizeof(u32) },
344: [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
345: [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
346: [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
347: [RTA_METRICS] = { 1, 0, 0 },
348: [RTA_FLOW] = { 1, 1, sizeof(u32) },
349: [RTA_TABLE] = { 1, 1, sizeof(u32) },
350: };
351: #endif
352:
353:
354: static int
355: nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
356: {
357: int max = ksize / sizeof(struct rtattr *);
358: bzero(k, ksize);
359:
360: for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
361: {
362: if ((a->rta_type >= max) || !want[a->rta_type].defined)
363: continue;
364:
365: if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
366: {
367: log(L_ERR "nl_parse_attrs: Malformed message received");
368: return 0;
369: }
370:
371: k[a->rta_type] = a;
372: }
373:
374: if (nl_attr_len)
375: {
376: log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
377: return 0;
378: }
379:
380: return 1;
381: }
382:
383: static inline u32 rta_get_u32(struct rtattr *a)
384: { return *(u32 *) RTA_DATA(a); }
385:
386: static inline ip4_addr rta_get_ip4(struct rtattr *a)
387: { return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
388:
389: static inline ip6_addr rta_get_ip6(struct rtattr *a)
390: { return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
391:
392:
393: struct rtattr *
394: nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
395: {
396: uint pos = NLMSG_ALIGN(h->nlmsg_len);
397: uint len = RTA_LENGTH(dlen);
398:
399: if (pos + len > bufsize)
400: bug("nl_add_attr: packet buffer overflow");
401:
402: struct rtattr *a = (struct rtattr *)((char *)h + pos);
403: a->rta_type = code;
404: a->rta_len = len;
405: h->nlmsg_len = pos + len;
406:
407: if (dlen > 0)
408: memcpy(RTA_DATA(a), data, dlen);
409:
410: return a;
411: }
412:
413: static inline void
414: nl_add_attr_u32(struct nlmsghdr *h, unsigned bufsize, int code, u32 data)
415: {
416: nl_add_attr(h, bufsize, code, &data, 4);
417: }
418:
419: static inline void
420: nl_add_attr_ipa(struct nlmsghdr *h, unsigned bufsize, int code, ip_addr ipa)
421: {
422: ipa_hton(ipa);
423: nl_add_attr(h, bufsize, code, &ipa, sizeof(ipa));
424: }
425:
426: static inline struct rtattr *
427: nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
428: {
429: return nl_add_attr(h, bufsize, code, NULL, 0);
430: }
431:
432: static inline void
433: nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
434: {
435: a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
436: }
437:
438: static inline struct rtnexthop *
439: nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
440: {
441: uint pos = NLMSG_ALIGN(h->nlmsg_len);
442: uint len = RTNH_LENGTH(0);
443:
444: if (pos + len > bufsize)
445: bug("nl_open_nexthop: packet buffer overflow");
446:
447: h->nlmsg_len = pos + len;
448:
449: return (void *)h + pos;
450: }
451:
452: static inline void
453: nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
454: {
455: nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
456: }
457:
458: static void
459: nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh)
460: {
461: struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
462:
463: for (; nh; nh = nh->next)
464: {
465: struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
466:
467: rtnh->rtnh_flags = 0;
468: rtnh->rtnh_hops = nh->weight;
469: rtnh->rtnh_ifindex = nh->iface->index;
470:
471: nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
472:
473: nl_close_nexthop(h, rtnh);
474: }
475:
476: nl_close_attr(h, a);
477: }
478:
479: static struct mpnh *
480: nl_parse_multipath(struct krt_proto *p, struct rtattr *ra)
481: {
482: /* Temporary buffer for multicast nexthops */
483: static struct mpnh *nh_buffer;
484: static int nh_buf_size; /* in number of structures */
485: static int nh_buf_used;
486:
487: struct rtattr *a[BIRD_RTA_MAX];
488: struct rtnexthop *nh = RTA_DATA(ra);
489: struct mpnh *rv, *first, **last;
490: unsigned len = RTA_PAYLOAD(ra);
491:
492: first = NULL;
493: last = &first;
494: nh_buf_used = 0;
495:
496: while (len)
497: {
498: /* Use RTNH_OK(nh,len) ?? */
499: if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
500: return NULL;
501:
502: if (nh_buf_used == nh_buf_size)
503: {
504: nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4;
505: nh_buffer = xrealloc(nh_buffer, nh_buf_size * sizeof(struct mpnh));
506: }
507: *last = rv = nh_buffer + nh_buf_used++;
508: rv->next = NULL;
509: last = &(rv->next);
510:
511: rv->weight = nh->rtnh_hops;
512: rv->iface = if_find_by_index(nh->rtnh_ifindex);
513: if (!rv->iface)
514: return NULL;
515:
516: /* Nonexistent RTNH_PAYLOAD ?? */
517: nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
518: nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want4, a, sizeof(a));
519: if (a[RTA_GATEWAY])
520: {
521: memcpy(&rv->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ip_addr));
522: ipa_ntoh(rv->gw);
523:
524: neighbor *ng = neigh_find2(&p->p, &rv->gw, rv->iface,
525: (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
526: if (!ng || (ng->scope == SCOPE_HOST))
527: return NULL;
528: }
529: else
530: return NULL;
531:
532: len -= NLMSG_ALIGN(nh->rtnh_len);
533: nh = RTNH_NEXT(nh);
534: }
535:
536: return first;
537: }
538:
539: static void
540: nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
541: {
542: struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
543: int t;
544:
545: for (t = 1; t < max; t++)
546: if (metrics[0] & (1 << t))
547: nl_add_attr_u32(h, bufsize, t, metrics[t]);
548:
549: nl_close_attr(h, a);
550: }
551:
552: static int
553: nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
554: {
555: struct rtattr *a = RTA_DATA(hdr);
556: int len = RTA_PAYLOAD(hdr);
557:
558: metrics[0] = 0;
559: for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
560: {
561: if (a->rta_type == RTA_UNSPEC)
562: continue;
563:
564: if (a->rta_type >= max)
565: continue;
566:
567: if (RTA_PAYLOAD(a) != 4)
568: return -1;
569:
570: metrics[0] |= 1 << a->rta_type;
571: metrics[a->rta_type] = rta_get_u32(a);
572: }
573:
574: if (len > 0)
575: return -1;
576:
577: return 0;
578: }
579:
580:
581: /*
582: * Scanning of interfaces
583: */
584:
585: static void
586: nl_parse_link(struct nlmsghdr *h, int scan)
587: {
588: struct ifinfomsg *i;
589: struct rtattr *a[BIRD_IFLA_MAX];
590: int new = h->nlmsg_type == RTM_NEWLINK;
591: struct iface f = {};
592: struct iface *ifi;
593: char *name;
594: u32 mtu;
595: uint fl;
596:
597: if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
598: return;
599: if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
600: {
601: /*
602: * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
603: * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
604: * We simply ignore all such messages with IFLA_WIRELESS without notice.
605: */
606:
607: if (a[IFLA_WIRELESS])
608: return;
609:
610: log(L_ERR "KIF: Malformed message received");
611: return;
612: }
613:
614: name = RTA_DATA(a[IFLA_IFNAME]);
615: mtu = rta_get_u32(a[IFLA_MTU]);
616:
617: ifi = if_find_by_index(i->ifi_index);
618: if (!new)
619: {
620: DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
621: if (!ifi)
622: return;
623:
624: if_delete(ifi);
625: }
626: else
627: {
628: DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
629: if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
630: if_delete(ifi);
631:
632: strncpy(f.name, name, sizeof(f.name)-1);
633: f.index = i->ifi_index;
634: f.mtu = mtu;
635:
636: fl = i->ifi_flags;
637: if (fl & IFF_UP)
638: f.flags |= IF_ADMIN_UP;
639: if (fl & IFF_LOWER_UP)
640: f.flags |= IF_LINK_UP;
641: if (fl & IFF_LOOPBACK) /* Loopback */
642: f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
643: else if (fl & IFF_POINTOPOINT) /* PtP */
644: f.flags |= IF_MULTICAST;
645: else if (fl & IFF_BROADCAST) /* Broadcast */
646: f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
647: else
648: f.flags |= IF_MULTIACCESS; /* NBMA */
649:
650: if (fl & IFF_MULTICAST)
651: f.flags |= IF_MULTICAST;
652:
653: ifi = if_update(&f);
654:
655: if (!scan)
656: if_end_partial_update(ifi);
657: }
658: }
659:
660: static void
661: nl_parse_addr(struct nlmsghdr *h, int scan)
662: {
663: struct ifaddrmsg *i;
664: struct rtattr *a[BIRD_IFA_MAX];
665: int new = h->nlmsg_type == RTM_NEWADDR;
666: struct ifa ifa;
667: struct iface *ifi;
668: int scope;
669: u32 ifa_flags;
670:
671: if (!(i = nl_checkin(h, sizeof(*i))))
672: return;
673:
674: switch (i->ifa_family)
675: {
676: #ifndef IPV6
677: case AF_INET:
678: if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
679: return;
680: if (!a[IFA_LOCAL])
681: {
682: log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
683: return;
684: }
685: break;
686: #else
687: case AF_INET6:
688: if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
689: return;
690: break;
691: #endif
692: default:
693: return;
694: }
695:
696: if (!a[IFA_ADDRESS])
697: {
698: log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
699: return;
700: }
701:
702: if (a[IFA_FLAGS])
703: ifa_flags = rta_get_u32(a[IFA_FLAGS]);
704: else
705: ifa_flags = i->ifa_flags;
706:
707: ifi = if_find_by_index(i->ifa_index);
708: if (!ifi)
709: {
710: log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
711: return;
712: }
713:
714: bzero(&ifa, sizeof(ifa));
715: ifa.iface = ifi;
716: if (ifa_flags & IFA_F_SECONDARY)
717: ifa.flags |= IA_SECONDARY;
718:
719: #ifdef IPV6
720: /* Ignore tentative addresses silently */
721: if (ifa_flags & IFA_F_TENTATIVE)
722: return;
723: #endif
724:
725: /* IFA_LOCAL can be unset for IPv6 interfaces */
726: memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip));
727: ipa_ntoh(ifa.ip);
728: ifa.pxlen = i->ifa_prefixlen;
729: if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS)
730: {
731: log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
732: new = 0;
733: }
734: if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS)
735: {
736: ip_addr addr;
737: memcpy(&addr, RTA_DATA(a[IFA_ADDRESS]), sizeof(addr));
738: ipa_ntoh(addr);
739: ifa.prefix = ifa.brd = addr;
740:
741: /* It is either a host address or a peer address */
742: if (ipa_equal(ifa.ip, addr))
743: ifa.flags |= IA_HOST;
744: else
745: {
746: ifa.flags |= IA_PEER;
747: ifa.opposite = addr;
748: }
749: }
750: else
751: {
752: ip_addr netmask = ipa_mkmask(ifa.pxlen);
753: ifa.prefix = ipa_and(ifa.ip, netmask);
754: ifa.brd = ipa_or(ifa.ip, ipa_not(netmask));
755: if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 1)
756: ifa.opposite = ipa_opposite_m1(ifa.ip);
757:
758: #ifndef IPV6
759: if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 2)
760: ifa.opposite = ipa_opposite_m2(ifa.ip);
761:
762: if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
763: {
764: ip_addr xbrd;
765: memcpy(&xbrd, RTA_DATA(a[IFA_BROADCAST]), sizeof(xbrd));
766: ipa_ntoh(xbrd);
767: if (ipa_equal(xbrd, ifa.prefix) || ipa_equal(xbrd, ifa.brd))
768: ifa.brd = xbrd;
769: else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
770: log(L_ERR "KIF: Invalid broadcast address %I for %s", xbrd, ifi->name);
771: }
772: #endif
773: }
774:
775: scope = ipa_classify(ifa.ip);
776: if (scope < 0)
777: {
778: log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
779: return;
780: }
781: ifa.scope = scope & IADDR_SCOPE_MASK;
782:
783: DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n",
784: ifi->index, ifi->name,
785: new ? "added" : "removed",
786: ifa.ip, ifa.flags, ifa.prefix, ifa.pxlen, ifa.brd, ifa.opposite);
787:
788: if (new)
789: ifa_update(&ifa);
790: else
791: ifa_delete(&ifa);
792:
793: if (!scan)
794: if_end_partial_update(ifi);
795: }
796:
797: void
798: kif_do_scan(struct kif_proto *p UNUSED)
799: {
800: struct nlmsghdr *h;
801:
802: if_start_update();
803:
804: nl_request_dump(AF_UNSPEC, RTM_GETLINK);
805: while (h = nl_get_scan())
806: if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
807: nl_parse_link(h, 1);
808: else
809: log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
810:
811: nl_request_dump(BIRD_AF, RTM_GETADDR);
812: while (h = nl_get_scan())
813: if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
814: nl_parse_addr(h, 1);
815: else
816: log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
817:
818: if_end_update();
819: }
820:
821: /*
822: * Routes
823: */
824:
825: static inline u32
826: krt_table_id(struct krt_proto *p)
827: {
828: return KRT_CF->sys.table_id;
829: }
830:
831: static HASH(struct krt_proto) nl_table_map;
832:
833: #define RTH_FN(k) u32_hash(k)
834: #define RTH_EQ(k1,k2) k1 == k2
835: #define RTH_KEY(p) krt_table_id(p)
836: #define RTH_NEXT(p) p->sys.hash_next
837:
838: #define RTH_REHASH rth_rehash
839: #define RTH_PARAMS /8, *2, 2, 2, 6, 20
840:
841: HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
842:
843: int
844: krt_capable(rte *e)
845: {
846: rta *a = e->attrs;
847:
848: if (a->cast != RTC_UNICAST)
849: return 0;
850:
851: switch (a->dest)
852: {
853: case RTD_ROUTER:
854: case RTD_DEVICE:
855: if (a->iface == NULL)
856: return 0;
857: case RTD_BLACKHOLE:
858: case RTD_UNREACHABLE:
859: case RTD_PROHIBIT:
860: case RTD_MULTIPATH:
861: break;
862: default:
863: return 0;
864: }
865: return 1;
866: }
867:
868: static inline int
869: nh_bufsize(struct mpnh *nh)
870: {
871: int rv = 0;
872: for (; nh != NULL; nh = nh->next)
873: rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
874: return rv;
875: }
876:
877: static int
878: nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, struct iface *iface)
879: {
880: eattr *ea;
881: net *net = e->net;
882: rta *a = e->attrs;
883: u32 priority = 0;
884:
885: struct {
886: struct nlmsghdr h;
887: struct rtmsg r;
888: char buf[128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops)];
889: } r;
890:
891: DBG("nl_send_route(%I/%d,op=%x)\n", net->n.prefix, net->n.pxlen, op);
892:
893: bzero(&r.h, sizeof(r.h));
894: bzero(&r.r, sizeof(r.r));
895: r.h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
896: r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
897: r.h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
898:
899: r.r.rtm_family = BIRD_AF;
900: r.r.rtm_dst_len = net->n.pxlen;
901: r.r.rtm_protocol = RTPROT_BIRD;
902: r.r.rtm_scope = RT_SCOPE_NOWHERE;
903: nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix);
904:
905: /*
906: * Strange behavior for RTM_DELROUTE:
907: * 1) rtm_family is ignored in IPv6, works for IPv4
908: * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
909: * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
910: */
911:
912: if (krt_table_id(p) < 256)
913: r.r.rtm_table = krt_table_id(p);
914: else
915: nl_add_attr_u32(&r.h, sizeof(r), RTA_TABLE, krt_table_id(p));
916:
917: if (a->source == RTS_DUMMY)
918: priority = e->u.krt.metric;
919: else if (KRT_CF->sys.metric)
920: priority = KRT_CF->sys.metric;
921: else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
922: priority = ea->u.data;
923:
924: if (priority)
925: nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, priority);
926:
927: /* For route delete, we do not specify remaining route attributes */
928: if (op == NL_OP_DELETE)
929: goto dest;
930:
931: /* Default scope is LINK for device routes, UNIVERSE otherwise */
932: if (ea = ea_find(eattrs, EA_KRT_SCOPE))
933: r.r.rtm_scope = ea->u.data;
934: else
935: r.r.rtm_scope = (dest == RTD_DEVICE) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
936:
937: if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
938: nl_add_attr_ipa(&r.h, sizeof(r), RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
939:
940: if (ea = ea_find(eattrs, EA_KRT_REALM))
941: nl_add_attr_u32(&r.h, sizeof(r), RTA_FLOW, ea->u.data);
942:
943:
944: u32 metrics[KRT_METRICS_MAX];
945: metrics[0] = 0;
946:
947: struct ea_walk_state ews = { .eattrs = eattrs };
948: while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
949: {
950: int id = ea->id - EA_KRT_METRICS;
951: metrics[0] |= 1 << id;
952: metrics[id] = ea->u.data;
953: }
954:
955: if (metrics[0])
956: nl_add_metrics(&r.h, sizeof(r), metrics, KRT_METRICS_MAX);
957:
958:
959: dest:
960: /* a->iface != NULL checked in krt_capable() for router and device routes */
961: switch (dest)
962: {
963: case RTD_ROUTER:
964: r.r.rtm_type = RTN_UNICAST;
965: nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index);
966: nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, gw);
967: break;
968: case RTD_DEVICE:
969: r.r.rtm_type = RTN_UNICAST;
970: nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index);
971: break;
972: case RTD_BLACKHOLE:
973: r.r.rtm_type = RTN_BLACKHOLE;
974: break;
975: case RTD_UNREACHABLE:
976: r.r.rtm_type = RTN_UNREACHABLE;
977: break;
978: case RTD_PROHIBIT:
979: r.r.rtm_type = RTN_PROHIBIT;
980: break;
981: case RTD_MULTIPATH:
982: r.r.rtm_type = RTN_UNICAST;
983: nl_add_multipath(&r.h, sizeof(r), a->nexthops);
984: break;
985: case RTD_NONE:
986: break;
987: default:
988: bug("krt_capable inconsistent with nl_send_route");
989: }
990:
991: /* Ignore missing for DELETE */
992: return nl_exchange(&r.h, (op == NL_OP_DELETE));
993: }
994:
995: static inline int
996: nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
997: {
998: rta *a = e->attrs;
999: int err = 0;
1000:
1001: if (krt_ecmp6(p) && (a->dest == RTD_MULTIPATH))
1002: {
1003: struct mpnh *nh = a->nexthops;
1004:
1005: err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface);
1006: if (err < 0)
1007: return err;
1008:
1009: for (nh = nh->next; nh; nh = nh->next)
1010: err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface);
1011:
1012: return err;
1013: }
1014:
1015: return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface);
1016: }
1017:
1018: static inline int
1019: nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
1020: {
1021: int err = 0;
1022:
1023: /* For IPv6, we just repeatedly request DELETE until we get error */
1024: do
1025: err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL);
1026: while (krt_ecmp6(p) && !err);
1027:
1028: return err;
1029: }
1030:
1031: void
1032: krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list *eattrs)
1033: {
1034: int err = 0;
1035:
1036: /*
1037: * We could use NL_OP_REPLACE, but route replace on Linux has some problems:
1038: *
1039: * 1) Does not check for matching rtm_protocol
1040: * 2) Has broken semantics for IPv6 ECMP
1041: * 3) Crashes some kernel version when used for IPv6 ECMP
1042: *
1043: * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
1044: * route value, so we do not try to optimize IPv6 ECMP reconfigurations.
1045: */
1046:
1047: if (old)
1048: nl_delete_rte(p, old, eattrs);
1049:
1050: if (new)
1051: err = nl_add_rte(p, new, eattrs);
1052:
1053: if (err < 0)
1054: n->n.flags |= KRF_SYNC_ERROR;
1055: else
1056: n->n.flags &= ~KRF_SYNC_ERROR;
1057: }
1058:
1059:
1060: static inline struct mpnh *
1061: nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight)
1062: {
1063: struct mpnh *nh = lp_alloc(s->pool, sizeof(struct mpnh));
1064:
1065: nh->gw = gw;
1066: nh->iface = iface;
1067: nh->next = NULL;
1068: nh->weight = weight;
1069:
1070: return nh;
1071: }
1072:
1073: static int
1074: nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type)
1075: {
1076: /* Route merging must be active */
1077: if (!s->merge)
1078: return 0;
1079:
1080: /* Saved and new route must have same network, proto/table, and priority */
1081: if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1082: return 0;
1083:
1084: /* Both must be regular unicast routes */
1085: if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1086: return 0;
1087:
1088: return 1;
1089: }
1090:
1091: static void
1092: nl_announce_route(struct nl_parse_state *s)
1093: {
1094: rte *e = rte_get_temp(s->attrs);
1095: e->net = s->net;
1096: e->u.krt.src = s->krt_src;
1097: e->u.krt.proto = s->krt_proto;
1098: e->u.krt.seen = 0;
1099: e->u.krt.best = 0;
1100: e->u.krt.metric = s->krt_metric;
1101:
1102: if (s->scan)
1103: krt_got_route(s->proto, e);
1104: else
1105: krt_got_route_async(s->proto, e, s->new);
1106:
1107: s->net = NULL;
1108: s->attrs = NULL;
1109: s->proto = NULL;
1110: lp_flush(s->pool);
1111: }
1112:
1113: static inline void
1114: nl_parse_begin(struct nl_parse_state *s, int scan, int merge)
1115: {
1116: memset(s, 0, sizeof (struct nl_parse_state));
1117: s->pool = nl_linpool;
1118: s->scan = scan;
1119: s->merge = merge;
1120: }
1121:
1122: static inline void
1123: nl_parse_end(struct nl_parse_state *s)
1124: {
1125: if (s->net)
1126: nl_announce_route(s);
1127: }
1128:
1129:
1130: #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1131:
1132: static void
1133: nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
1134: {
1135: struct krt_proto *p;
1136: struct rtmsg *i;
1137: struct rtattr *a[BIRD_RTA_MAX];
1138: int new = h->nlmsg_type == RTM_NEWROUTE;
1139:
1140: ip_addr dst = IPA_NONE;
1141: u32 oif = ~0;
1142: u32 table;
1143: u32 priority = 0;
1144: u32 def_scope = RT_SCOPE_UNIVERSE;
1145: int src;
1146:
1147: if (!(i = nl_checkin(h, sizeof(*i))))
1148: return;
1149:
1150: switch (i->rtm_family)
1151: {
1152: #ifndef IPV6
1153: case AF_INET:
1154: if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1155: return;
1156: break;
1157: #else
1158: case AF_INET6:
1159: if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1160: return;
1161: break;
1162: #endif
1163: default:
1164: return;
1165: }
1166:
1167: if (a[RTA_DST])
1168: {
1169: memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst));
1170: ipa_ntoh(dst);
1171: }
1172:
1173: if (a[RTA_OIF])
1174: oif = rta_get_u32(a[RTA_OIF]);
1175:
1176: if (a[RTA_TABLE])
1177: table = rta_get_u32(a[RTA_TABLE]);
1178: else
1179: table = i->rtm_table;
1180:
1181: p = HASH_FIND(nl_table_map, RTH, table); /* Do we know this table? */
1182: DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, table, i->rtm_protocol, p ? p->p.name : "(none)");
1183: if (!p)
1184: SKIP("unknown table %d\n", table);
1185:
1186: #ifdef IPV6
1187: if (a[RTA_IIF])
1188: SKIP("IIF set\n");
1189: #else
1190: if (i->rtm_tos != 0) /* We don't support TOS */
1191: SKIP("TOS %02x\n", i->rtm_tos);
1192: #endif
1193:
1194: if (s->scan && !new)
1195: SKIP("RTM_DELROUTE in scan\n");
1196:
1197: if (a[RTA_PRIORITY])
1198: priority = rta_get_u32(a[RTA_PRIORITY]);
1199:
1200: int c = ipa_classify_net(dst);
1201: if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1202: SKIP("strange class/scope\n");
1203:
1204: switch (i->rtm_protocol)
1205: {
1206: case RTPROT_UNSPEC:
1207: SKIP("proto unspec\n");
1208:
1209: case RTPROT_REDIRECT:
1210: src = KRT_SRC_REDIRECT;
1211: break;
1212:
1213: case RTPROT_KERNEL:
1214: src = KRT_SRC_KERNEL;
1215: return;
1216:
1217: case RTPROT_BIRD:
1218: if (!s->scan)
1219: SKIP("echo\n");
1220: src = KRT_SRC_BIRD;
1221: break;
1222:
1223: case RTPROT_BOOT:
1224: default:
1225: src = KRT_SRC_ALIEN;
1226: }
1227:
1228: net *net = net_get(p->p.table, dst, i->rtm_dst_len);
1229:
1230: if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type))
1231: nl_announce_route(s);
1232:
1233: rta *ra = lp_allocz(s->pool, sizeof(rta));
1234: ra->src = p->p.main_source;
1235: ra->source = RTS_INHERIT;
1236: ra->scope = SCOPE_UNIVERSE;
1237: ra->cast = RTC_UNICAST;
1238:
1239: switch (i->rtm_type)
1240: {
1241: case RTN_UNICAST:
1242:
1243: if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
1244: {
1245: ra->dest = RTD_MULTIPATH;
1246: ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
1247: if (!ra->nexthops)
1248: {
1249: log(L_ERR "KRT: Received strange multipath route %I/%d",
1250: net->n.prefix, net->n.pxlen);
1251: return;
1252: }
1253:
1254: break;
1255: }
1256:
1257: ra->iface = if_find_by_index(oif);
1258: if (!ra->iface)
1259: {
1260: log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
1261: net->n.prefix, net->n.pxlen, oif);
1262: return;
1263: }
1264:
1265: if (a[RTA_GATEWAY])
1266: {
1267: neighbor *ng;
1268: ra->dest = RTD_ROUTER;
1269: memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw));
1270: ipa_ntoh(ra->gw);
1271:
1272: #ifdef IPV6
1273: /* Silently skip strange 6to4 routes */
1274: if (ipa_in_net(ra->gw, IPA_NONE, 96))
1275: return;
1276: #endif
1277:
1278: ng = neigh_find2(&p->p, &ra->gw, ra->iface,
1279: (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
1280: if (!ng || (ng->scope == SCOPE_HOST))
1281: {
1282: log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",
1283: net->n.prefix, net->n.pxlen, ra->gw);
1284: return;
1285: }
1286: }
1287: else
1288: {
1289: ra->dest = RTD_DEVICE;
1290: def_scope = RT_SCOPE_LINK;
1291: }
1292:
1293: break;
1294: case RTN_BLACKHOLE:
1295: ra->dest = RTD_BLACKHOLE;
1296: break;
1297: case RTN_UNREACHABLE:
1298: ra->dest = RTD_UNREACHABLE;
1299: break;
1300: case RTN_PROHIBIT:
1301: ra->dest = RTD_PROHIBIT;
1302: break;
1303: /* FIXME: What about RTN_THROW? */
1304: default:
1305: SKIP("type %d\n", i->rtm_type);
1306: return;
1307: }
1308:
1309: if (i->rtm_scope != def_scope)
1310: {
1311: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1312: ea->next = ra->eattrs;
1313: ra->eattrs = ea;
1314: ea->flags = EALF_SORTED;
1315: ea->count = 1;
1316: ea->attrs[0].id = EA_KRT_SCOPE;
1317: ea->attrs[0].flags = 0;
1318: ea->attrs[0].type = EAF_TYPE_INT;
1319: ea->attrs[0].u.data = i->rtm_scope;
1320: }
1321:
1322: if (a[RTA_PREFSRC])
1323: {
1324: ip_addr ps;
1325: memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps));
1326: ipa_ntoh(ps);
1327:
1328: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1329: ea->next = ra->eattrs;
1330: ra->eattrs = ea;
1331: ea->flags = EALF_SORTED;
1332: ea->count = 1;
1333: ea->attrs[0].id = EA_KRT_PREFSRC;
1334: ea->attrs[0].flags = 0;
1335: ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
1336: ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
1337: ea->attrs[0].u.ptr->length = sizeof(ps);
1338: memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
1339: }
1340:
1341: if (a[RTA_FLOW])
1342: {
1343: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1344: ea->next = ra->eattrs;
1345: ra->eattrs = ea;
1346: ea->flags = EALF_SORTED;
1347: ea->count = 1;
1348: ea->attrs[0].id = EA_KRT_REALM;
1349: ea->attrs[0].flags = 0;
1350: ea->attrs[0].type = EAF_TYPE_INT;
1351: ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
1352: }
1353:
1354: if (a[RTA_METRICS])
1355: {
1356: u32 metrics[KRT_METRICS_MAX];
1357: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
1358: int t, n = 0;
1359:
1360: if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1361: {
1362: log(L_ERR "KRT: Received route %I/%d with strange RTA_METRICS attribute",
1363: net->n.prefix, net->n.pxlen);
1364: return;
1365: }
1366:
1367: for (t = 1; t < KRT_METRICS_MAX; t++)
1368: if (metrics[0] & (1 << t))
1369: {
1370: ea->attrs[n].id = EA_CODE(EAP_KRT, KRT_METRICS_OFFSET + t);
1371: ea->attrs[n].flags = 0;
1372: ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1373: ea->attrs[n].u.data = metrics[t];
1374: n++;
1375: }
1376:
1377: if (n > 0)
1378: {
1379: ea->next = ra->eattrs;
1380: ea->flags = EALF_SORTED;
1381: ea->count = n;
1382: ra->eattrs = ea;
1383: }
1384: }
1385:
1386: /*
1387: * Ideally, now we would send the received route to the rest of kernel code.
1388: * But IPv6 ECMP routes are sent as a sequence of routes, so we postpone it
1389: * and merge next hops until the end of the sequence.
1390: */
1391:
1392: if (!s->net)
1393: {
1394: /* Store the new route */
1395: s->net = net;
1396: s->attrs = ra;
1397: s->proto = p;
1398: s->new = new;
1399: s->krt_src = src;
1400: s->krt_type = i->rtm_type;
1401: s->krt_proto = i->rtm_protocol;
1402: s->krt_metric = priority;
1403: }
1404: else
1405: {
1406: /* Merge next hops with the stored route */
1407: rta *a = s->attrs;
1408:
1409: if (a->dest != RTD_MULTIPATH)
1410: {
1411: a->dest = RTD_MULTIPATH;
1412: a->nexthops = nl_alloc_mpnh(s, a->gw, a->iface, 0);
1413: }
1414:
1415: mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->iface, 0));
1416: }
1417: }
1418:
1419: void
1420: krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1421: {
1422: struct nlmsghdr *h;
1423: struct nl_parse_state s;
1424:
1425: nl_parse_begin(&s, 1, krt_ecmp6(p));
1426:
1427: nl_request_dump(BIRD_AF, RTM_GETROUTE);
1428: while (h = nl_get_scan())
1429: if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1430: nl_parse_route(&s, h);
1431: else
1432: log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1433:
1434: nl_parse_end(&s);
1435: }
1436:
1437: /*
1438: * Asynchronous Netlink interface
1439: */
1440:
1441: static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1442: static byte *nl_async_rx_buffer; /* Receive buffer */
1443:
1444: static void
1445: nl_async_msg(struct nlmsghdr *h)
1446: {
1447: struct nl_parse_state s;
1448:
1449: switch (h->nlmsg_type)
1450: {
1451: case RTM_NEWROUTE:
1452: case RTM_DELROUTE:
1453: DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
1454: nl_parse_begin(&s, 0, 0);
1455: nl_parse_route(&s, h);
1456: nl_parse_end(&s);
1457: break;
1458: case RTM_NEWLINK:
1459: case RTM_DELLINK:
1460: DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1461: if (kif_proto)
1462: nl_parse_link(h, 0);
1463: break;
1464: case RTM_NEWADDR:
1465: case RTM_DELADDR:
1466: DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1467: if (kif_proto)
1468: nl_parse_addr(h, 0);
1469: break;
1470: default:
1471: DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1472: }
1473: }
1474:
1475: static int
1476: nl_async_hook(sock *sk, uint size UNUSED)
1477: {
1478: struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1479: struct sockaddr_nl sa;
1480: struct msghdr m = {
1481: .msg_name = &sa,
1482: .msg_namelen = sizeof(sa),
1483: .msg_iov = &iov,
1484: .msg_iovlen = 1,
1485: };
1486: struct nlmsghdr *h;
1487: int x;
1488: uint len;
1489:
1490: x = recvmsg(sk->fd, &m, 0);
1491: if (x < 0)
1492: {
1493: if (errno == ENOBUFS)
1494: {
1495: /*
1496: * Netlink reports some packets have been thrown away.
1497: * One day we might react to it by asking for route table
1498: * scan in near future.
1499: */
1500: log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
1501: return 1; /* More data are likely to be ready */
1502: }
1503: else if (errno != EWOULDBLOCK)
1504: log(L_ERR "Netlink recvmsg: %m");
1505: return 0;
1506: }
1507: if (sa.nl_pid) /* It isn't from the kernel */
1508: {
1509: DBG("Non-kernel packet\n");
1510: return 1;
1511: }
1512: h = (void *) nl_async_rx_buffer;
1513: len = x;
1514: if (m.msg_flags & MSG_TRUNC)
1515: {
1516: log(L_WARN "Netlink got truncated asynchronous message");
1517: return 1;
1518: }
1519: while (NLMSG_OK(h, len))
1520: {
1521: nl_async_msg(h);
1522: h = NLMSG_NEXT(h, len);
1523: }
1524: if (len)
1525: log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1526: return 1;
1527: }
1528:
1529: static void
1530: nl_async_err_hook(sock *sk, int e UNUSED)
1531: {
1532: nl_async_hook(sk, 0);
1533: }
1534:
1535: static void
1536: nl_open_async(void)
1537: {
1538: sock *sk;
1539: struct sockaddr_nl sa;
1540: int fd;
1541:
1542: if (nl_async_sk)
1543: return;
1544:
1545: DBG("KRT: Opening async netlink socket\n");
1546:
1547: fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1548: if (fd < 0)
1549: {
1550: log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1551: return;
1552: }
1553:
1554: bzero(&sa, sizeof(sa));
1555: sa.nl_family = AF_NETLINK;
1556: #ifdef IPV6
1557: sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1558: #else
1559: sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE;
1560: #endif
1561: if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1562: {
1563: log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
1564: close(fd);
1565: return;
1566: }
1567:
1568: nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1569:
1570: sk = nl_async_sk = sk_new(krt_pool);
1571: sk->type = SK_MAGIC;
1572: sk->rx_hook = nl_async_hook;
1573: sk->err_hook = nl_async_err_hook;
1574: sk->fd = fd;
1575: if (sk_open(sk) < 0)
1576: bug("Netlink: sk_open failed");
1577: }
1578:
1579:
1580: /*
1581: * Interface to the UNIX krt module
1582: */
1583:
1584: void
1585: krt_sys_io_init(void)
1586: {
1587: nl_linpool = lp_new(krt_pool, 4080);
1588: HASH_INIT(nl_table_map, krt_pool, 6);
1589: }
1590:
1591: int
1592: krt_sys_start(struct krt_proto *p)
1593: {
1594: struct krt_proto *old = HASH_FIND(nl_table_map, RTH, krt_table_id(p));
1595:
1596: if (old)
1597: {
1598: log(L_ERR "%s: Kernel table %u already registered by %s",
1599: p->p.name, krt_table_id(p), old->p.name);
1600: return 0;
1601: }
1602:
1603: HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
1604:
1605: nl_open();
1606: nl_open_async();
1607:
1608: return 1;
1609: }
1610:
1611: void
1612: krt_sys_shutdown(struct krt_proto *p)
1613: {
1614: HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
1615: }
1616:
1617: int
1618: krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
1619: {
1620: return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
1621: }
1622:
1623: void
1624: krt_sys_init_config(struct krt_config *cf)
1625: {
1626: cf->sys.table_id = RT_TABLE_MAIN;
1627: cf->sys.metric = 0;
1628: }
1629:
1630: void
1631: krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
1632: {
1633: d->sys.table_id = s->sys.table_id;
1634: d->sys.metric = s->sys.metric;
1635: }
1636:
1637: static const char *krt_metrics_names[KRT_METRICS_MAX] = {
1638: NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
1639: "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
1640: };
1641:
1642: static const char *krt_features_names[KRT_FEATURES_MAX] = {
1643: "ecn", NULL, NULL, "allfrag"
1644: };
1645:
1646: int
1647: krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
1648: {
1649: switch (a->id)
1650: {
1651: case EA_KRT_PREFSRC:
1652: bsprintf(buf, "prefsrc");
1653: return GA_NAME;
1654:
1655: case EA_KRT_REALM:
1656: bsprintf(buf, "realm");
1657: return GA_NAME;
1658:
1659: case EA_KRT_SCOPE:
1660: bsprintf(buf, "scope");
1661: return GA_NAME;
1662:
1663: case EA_KRT_LOCK:
1664: buf += bsprintf(buf, "lock:");
1665: ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
1666: return GA_FULL;
1667:
1668: case EA_KRT_FEATURES:
1669: buf += bsprintf(buf, "features:");
1670: ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
1671: return GA_FULL;
1672:
1673: default:;
1674: int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
1675: if (id > 0 && id < KRT_METRICS_MAX)
1676: {
1677: bsprintf(buf, "%s", krt_metrics_names[id]);
1678: return GA_NAME;
1679: }
1680:
1681: return GA_UNKNOWN;
1682: }
1683: }
1684:
1685:
1686:
1687: void
1688: kif_sys_start(struct kif_proto *p UNUSED)
1689: {
1690: nl_open();
1691: nl_open_async();
1692: }
1693:
1694: void
1695: kif_sys_shutdown(struct kif_proto *p UNUSED)
1696: {
1697: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>