Annotation of embedaddon/bird/sysdep/linux/netlink.c, revision 1.1.1.2
1.1 misho 1: /*
2: * BIRD -- Linux Netlink Interface
3: *
4: * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5: *
6: * Can be freely distributed and used under the terms of the GNU GPL.
7: */
8:
9: #include <stdio.h>
10: #include <unistd.h>
11: #include <fcntl.h>
12: #include <sys/socket.h>
13: #include <sys/uio.h>
14: #include <errno.h>
15:
16: #undef LOCAL_DEBUG
17:
18: #include "nest/bird.h"
19: #include "nest/route.h"
20: #include "nest/protocol.h"
21: #include "nest/iface.h"
22: #include "lib/timer.h"
23: #include "lib/unix.h"
24: #include "lib/krt.h"
25: #include "lib/socket.h"
26: #include "lib/string.h"
27: #include "lib/hash.h"
28: #include "conf/conf.h"
29:
30: #include <asm/types.h>
31: #include <linux/if.h>
32: #include <linux/netlink.h>
33: #include <linux/rtnetlink.h>
34:
35:
36: #ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
37: #define MSG_TRUNC 0x20
38: #endif
39:
40: #ifndef IFA_FLAGS
41: #define IFA_FLAGS 8
42: #endif
43:
44: #ifndef IFF_LOWER_UP
45: #define IFF_LOWER_UP 0x10000
46: #endif
47:
48: #ifndef RTA_TABLE
49: #define RTA_TABLE 15
50: #endif
51:
52:
53: #ifdef IPV6
54: #define krt_ecmp6(X) 1
55: #else
56: #define krt_ecmp6(X) 0
57: #endif
58:
59: /*
60: * Structure nl_parse_state keeps state of received route processing. Ideally,
61: * we could just independently parse received Netlink messages and immediately
1.1.1.2 ! misho 62: * propagate received routes to the rest of BIRD, but older Linux kernel (before
! 63: * version 4.11) represents and announces IPv6 ECMP routes not as one route with
! 64: * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
! 65: * routes with the same prefix. More recent kernels work as with IPv4.
1.1 misho 66: *
67: * Therefore, BIRD keeps currently processed route in nl_parse_state structure
68: * and postpones its propagation until we expect it to be final; i.e., when
69: * non-matching route is received or when the scan ends. When another matching
70: * route is received, it is merged with the already processed route to form an
71: * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
1.1.1.2 ! misho 72: * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
! 73: * routes with RTA_MULTIPATH set are just considered non-matching.
1.1 misho 74: *
75: * This is ignored for asynchronous notifications (every notification is handled
76: * as a separate route). It is not an issue for our routes, as we ignore such
77: * notifications anyways. But importing alien IPv6 ECMP routes does not work
1.1.1.2 ! misho 78: * properly with older kernels.
! 79: *
! 80: * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
! 81: * for the same prefix.
1.1 misho 82: */
83:
84: struct nl_parse_state
85: {
86: struct linpool *pool;
87: int scan;
88: int merge;
89:
90: net *net;
91: rta *attrs;
92: struct krt_proto *proto;
93: s8 new;
94: s8 krt_src;
95: u8 krt_type;
96: u8 krt_proto;
97: u32 krt_metric;
98: };
99:
100: /*
101: * Synchronous Netlink interface
102: */
103:
104: struct nl_sock
105: {
106: int fd;
107: u32 seq;
108: byte *rx_buffer; /* Receive buffer */
109: struct nlmsghdr *last_hdr; /* Recently received packet */
110: uint last_size;
111: };
112:
113: #define NL_RX_SIZE 8192
114:
115: #define NL_OP_DELETE 0
116: #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
117: #define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
118: #define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
119:
120: static linpool *nl_linpool;
121:
122: static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
123: static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
124:
125: static void
126: nl_open_sock(struct nl_sock *nl)
127: {
128: if (nl->fd < 0)
129: {
130: nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
131: if (nl->fd < 0)
132: die("Unable to open rtnetlink socket: %m");
133: nl->seq = now;
134: nl->rx_buffer = xmalloc(NL_RX_SIZE);
135: nl->last_hdr = NULL;
136: nl->last_size = 0;
137: }
138: }
139:
140: static void
141: nl_open(void)
142: {
143: nl_open_sock(&nl_scan);
144: nl_open_sock(&nl_req);
145: }
146:
147: static void
148: nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
149: {
150: struct sockaddr_nl sa;
151:
152: memset(&sa, 0, sizeof(sa));
153: sa.nl_family = AF_NETLINK;
154: nh->nlmsg_pid = 0;
155: nh->nlmsg_seq = ++(nl->seq);
156: if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
157: die("rtnetlink sendto: %m");
158: nl->last_hdr = NULL;
159: }
160:
161: static void
162: nl_request_dump(int af, int cmd)
163: {
164: struct {
165: struct nlmsghdr nh;
166: struct rtgenmsg g;
167: } req = {
168: .nh.nlmsg_type = cmd,
169: .nh.nlmsg_len = sizeof(req),
170: .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
171: .g.rtgen_family = af
172: };
173: nl_send(&nl_scan, &req.nh);
174: }
175:
176: static struct nlmsghdr *
177: nl_get_reply(struct nl_sock *nl)
178: {
179: for(;;)
180: {
181: if (!nl->last_hdr)
182: {
183: struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
184: struct sockaddr_nl sa;
185: struct msghdr m = {
186: .msg_name = &sa,
187: .msg_namelen = sizeof(sa),
188: .msg_iov = &iov,
189: .msg_iovlen = 1,
190: };
191: int x = recvmsg(nl->fd, &m, 0);
192: if (x < 0)
193: die("nl_get_reply: %m");
194: if (sa.nl_pid) /* It isn't from the kernel */
195: {
196: DBG("Non-kernel packet\n");
197: continue;
198: }
199: nl->last_size = x;
200: nl->last_hdr = (void *) nl->rx_buffer;
201: if (m.msg_flags & MSG_TRUNC)
202: bug("nl_get_reply: got truncated reply which should be impossible");
203: }
204: if (NLMSG_OK(nl->last_hdr, nl->last_size))
205: {
206: struct nlmsghdr *h = nl->last_hdr;
207: nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
208: if (h->nlmsg_seq != nl->seq)
209: {
210: log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
211: h->nlmsg_seq, nl->seq);
212: continue;
213: }
214: return h;
215: }
216: if (nl->last_size)
217: log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
218: nl->last_hdr = NULL;
219: }
220: }
221:
222: static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
223:
224: static int
225: nl_error(struct nlmsghdr *h, int ignore_esrch)
226: {
227: struct nlmsgerr *e;
228: int ec;
229:
230: if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
231: {
232: log(L_WARN "Netlink: Truncated error message received");
233: return ENOBUFS;
234: }
235: e = (struct nlmsgerr *) NLMSG_DATA(h);
236: ec = -e->error;
237: if (ec && !(ignore_esrch && (ec == ESRCH)))
238: log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
239: return ec;
240: }
241:
242: static struct nlmsghdr *
243: nl_get_scan(void)
244: {
245: struct nlmsghdr *h = nl_get_reply(&nl_scan);
246:
247: if (h->nlmsg_type == NLMSG_DONE)
248: return NULL;
249: if (h->nlmsg_type == NLMSG_ERROR)
250: {
251: nl_error(h, 0);
252: return NULL;
253: }
254: return h;
255: }
256:
257: static int
258: nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
259: {
260: struct nlmsghdr *h;
261:
262: nl_send(&nl_req, pkt);
263: for(;;)
264: {
265: h = nl_get_reply(&nl_req);
266: if (h->nlmsg_type == NLMSG_ERROR)
267: break;
268: log(L_WARN "nl_exchange: Unexpected reply received");
269: }
270: return nl_error(h, ignore_esrch) ? -1 : 0;
271: }
272:
273: /*
274: * Netlink attributes
275: */
276:
277: static int nl_attr_len;
278:
279: static void *
280: nl_checkin(struct nlmsghdr *h, int lsize)
281: {
282: nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
283: if (nl_attr_len < 0)
284: {
285: log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
286: return NULL;
287: }
288: return NLMSG_DATA(h);
289: }
290:
291: struct nl_want_attrs {
292: u8 defined:1;
293: u8 checksize:1;
294: u8 size;
295: };
296:
297:
298: #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
299:
300: static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
301: [IFLA_IFNAME] = { 1, 0, 0 },
302: [IFLA_MTU] = { 1, 1, sizeof(u32) },
1.1.1.2 ! misho 303: [IFLA_MASTER] = { 1, 1, sizeof(u32) },
1.1 misho 304: [IFLA_WIRELESS] = { 1, 0, 0 },
305: };
306:
307:
308: #define BIRD_IFA_MAX (IFA_FLAGS+1)
309:
310: #ifndef IPV6
311: static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
312: [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
313: [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
314: [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
315: [IFA_FLAGS] = { 1, 1, sizeof(u32) },
316: };
317: #else
318: static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
319: [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
320: [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
321: [IFA_FLAGS] = { 1, 1, sizeof(u32) },
322: };
323: #endif
324:
325:
326: #define BIRD_RTA_MAX (RTA_TABLE+1)
327:
1.1.1.2 ! misho 328: #ifndef IPV6
1.1 misho 329: static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = {
330: [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
331: };
1.1.1.2 ! misho 332: #else
! 333: static struct nl_want_attrs mpnh_attr_want6[BIRD_RTA_MAX] = {
! 334: [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
! 335: };
! 336: #endif
1.1 misho 337:
338: #ifndef IPV6
339: static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
340: [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
341: [RTA_OIF] = { 1, 1, sizeof(u32) },
342: [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
343: [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
344: [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
345: [RTA_METRICS] = { 1, 0, 0 },
346: [RTA_MULTIPATH] = { 1, 0, 0 },
347: [RTA_FLOW] = { 1, 1, sizeof(u32) },
348: [RTA_TABLE] = { 1, 1, sizeof(u32) },
349: };
350: #else
351: static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
352: [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
353: [RTA_IIF] = { 1, 1, sizeof(u32) },
354: [RTA_OIF] = { 1, 1, sizeof(u32) },
355: [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
356: [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
357: [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
358: [RTA_METRICS] = { 1, 0, 0 },
1.1.1.2 ! misho 359: [RTA_MULTIPATH] = { 1, 0, 0 },
1.1 misho 360: [RTA_FLOW] = { 1, 1, sizeof(u32) },
361: [RTA_TABLE] = { 1, 1, sizeof(u32) },
362: };
363: #endif
364:
365:
366: static int
367: nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
368: {
369: int max = ksize / sizeof(struct rtattr *);
370: bzero(k, ksize);
371:
372: for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
373: {
374: if ((a->rta_type >= max) || !want[a->rta_type].defined)
375: continue;
376:
377: if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
378: {
379: log(L_ERR "nl_parse_attrs: Malformed message received");
380: return 0;
381: }
382:
383: k[a->rta_type] = a;
384: }
385:
386: if (nl_attr_len)
387: {
388: log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
389: return 0;
390: }
391:
392: return 1;
393: }
394:
395: static inline u32 rta_get_u32(struct rtattr *a)
396: { return *(u32 *) RTA_DATA(a); }
397:
398: static inline ip4_addr rta_get_ip4(struct rtattr *a)
399: { return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
400:
401: static inline ip6_addr rta_get_ip6(struct rtattr *a)
402: { return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
403:
404:
405: struct rtattr *
406: nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
407: {
408: uint pos = NLMSG_ALIGN(h->nlmsg_len);
409: uint len = RTA_LENGTH(dlen);
410:
411: if (pos + len > bufsize)
412: bug("nl_add_attr: packet buffer overflow");
413:
414: struct rtattr *a = (struct rtattr *)((char *)h + pos);
415: a->rta_type = code;
416: a->rta_len = len;
417: h->nlmsg_len = pos + len;
418:
419: if (dlen > 0)
420: memcpy(RTA_DATA(a), data, dlen);
421:
422: return a;
423: }
424:
425: static inline void
426: nl_add_attr_u32(struct nlmsghdr *h, unsigned bufsize, int code, u32 data)
427: {
428: nl_add_attr(h, bufsize, code, &data, 4);
429: }
430:
431: static inline void
432: nl_add_attr_ipa(struct nlmsghdr *h, unsigned bufsize, int code, ip_addr ipa)
433: {
434: ipa_hton(ipa);
435: nl_add_attr(h, bufsize, code, &ipa, sizeof(ipa));
436: }
437:
438: static inline struct rtattr *
439: nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
440: {
441: return nl_add_attr(h, bufsize, code, NULL, 0);
442: }
443:
444: static inline void
445: nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
446: {
447: a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
448: }
449:
450: static inline struct rtnexthop *
451: nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
452: {
453: uint pos = NLMSG_ALIGN(h->nlmsg_len);
454: uint len = RTNH_LENGTH(0);
455:
456: if (pos + len > bufsize)
457: bug("nl_open_nexthop: packet buffer overflow");
458:
459: h->nlmsg_len = pos + len;
460:
461: return (void *)h + pos;
462: }
463:
464: static inline void
465: nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
466: {
467: nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
468: }
469:
470: static void
471: nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh)
472: {
473: struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
474:
475: for (; nh; nh = nh->next)
476: {
477: struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
478:
479: rtnh->rtnh_flags = 0;
480: rtnh->rtnh_hops = nh->weight;
481: rtnh->rtnh_ifindex = nh->iface->index;
482:
483: nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
484:
485: nl_close_nexthop(h, rtnh);
486: }
487:
488: nl_close_attr(h, a);
489: }
490:
491: static struct mpnh *
1.1.1.2 ! misho 492: nl_parse_multipath(struct krt_proto *p, struct rtattr *ra, int af)
1.1 misho 493: {
494: /* Temporary buffer for multicast nexthops */
495: static struct mpnh *nh_buffer;
496: static int nh_buf_size; /* in number of structures */
497: static int nh_buf_used;
498:
499: struct rtattr *a[BIRD_RTA_MAX];
500: struct rtnexthop *nh = RTA_DATA(ra);
501: struct mpnh *rv, *first, **last;
502: unsigned len = RTA_PAYLOAD(ra);
503:
504: first = NULL;
505: last = &first;
506: nh_buf_used = 0;
507:
508: while (len)
509: {
510: /* Use RTNH_OK(nh,len) ?? */
511: if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
512: return NULL;
513:
514: if (nh_buf_used == nh_buf_size)
515: {
516: nh_buf_size = nh_buf_size ? (nh_buf_size * 2) : 4;
517: nh_buffer = xrealloc(nh_buffer, nh_buf_size * sizeof(struct mpnh));
518: }
519: *last = rv = nh_buffer + nh_buf_used++;
520: rv->next = NULL;
521: last = &(rv->next);
522:
523: rv->weight = nh->rtnh_hops;
524: rv->iface = if_find_by_index(nh->rtnh_ifindex);
525: if (!rv->iface)
526: return NULL;
527:
528: /* Nonexistent RTNH_PAYLOAD ?? */
529: nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
1.1.1.2 ! misho 530: switch (af)
! 531: {
! 532: #ifndef IPV6
! 533: case AF_INET:
! 534: if (!nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want4, a, sizeof(a)))
! 535: return NULL;
! 536: break;
! 537: #else
! 538: case AF_INET6:
! 539: if (!nl_parse_attrs(RTNH_DATA(nh), mpnh_attr_want6, a, sizeof(a)))
! 540: return NULL;
! 541: break;
! 542: #endif
! 543: default:
! 544: return NULL;
! 545: }
! 546:
1.1 misho 547: if (a[RTA_GATEWAY])
548: {
1.1.1.2 ! misho 549: memcpy(&rv->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(rv->gw));
1.1 misho 550: ipa_ntoh(rv->gw);
551:
552: neighbor *ng = neigh_find2(&p->p, &rv->gw, rv->iface,
553: (nh->rtnh_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
554: if (!ng || (ng->scope == SCOPE_HOST))
555: return NULL;
556: }
557: else
558: return NULL;
559:
560: len -= NLMSG_ALIGN(nh->rtnh_len);
561: nh = RTNH_NEXT(nh);
562: }
563:
564: return first;
565: }
566:
567: static void
568: nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
569: {
570: struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
571: int t;
572:
573: for (t = 1; t < max; t++)
574: if (metrics[0] & (1 << t))
575: nl_add_attr_u32(h, bufsize, t, metrics[t]);
576:
577: nl_close_attr(h, a);
578: }
579:
580: static int
581: nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
582: {
583: struct rtattr *a = RTA_DATA(hdr);
584: int len = RTA_PAYLOAD(hdr);
585:
586: metrics[0] = 0;
587: for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
588: {
589: if (a->rta_type == RTA_UNSPEC)
590: continue;
591:
592: if (a->rta_type >= max)
593: continue;
594:
595: if (RTA_PAYLOAD(a) != 4)
596: return -1;
597:
598: metrics[0] |= 1 << a->rta_type;
599: metrics[a->rta_type] = rta_get_u32(a);
600: }
601:
602: if (len > 0)
603: return -1;
604:
605: return 0;
606: }
607:
608:
609: /*
610: * Scanning of interfaces
611: */
612:
613: static void
614: nl_parse_link(struct nlmsghdr *h, int scan)
615: {
616: struct ifinfomsg *i;
617: struct rtattr *a[BIRD_IFLA_MAX];
618: int new = h->nlmsg_type == RTM_NEWLINK;
619: struct iface f = {};
620: struct iface *ifi;
621: char *name;
1.1.1.2 ! misho 622: u32 mtu, master = 0;
1.1 misho 623: uint fl;
624:
625: if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
626: return;
627: if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
628: {
629: /*
630: * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
631: * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
632: * We simply ignore all such messages with IFLA_WIRELESS without notice.
633: */
634:
635: if (a[IFLA_WIRELESS])
636: return;
637:
638: log(L_ERR "KIF: Malformed message received");
639: return;
640: }
641:
642: name = RTA_DATA(a[IFLA_IFNAME]);
643: mtu = rta_get_u32(a[IFLA_MTU]);
644:
1.1.1.2 ! misho 645: if (a[IFLA_MASTER])
! 646: master = rta_get_u32(a[IFLA_MASTER]);
! 647:
1.1 misho 648: ifi = if_find_by_index(i->ifi_index);
649: if (!new)
650: {
651: DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
652: if (!ifi)
653: return;
654:
655: if_delete(ifi);
656: }
657: else
658: {
659: DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
660: if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
661: if_delete(ifi);
662:
663: strncpy(f.name, name, sizeof(f.name)-1);
664: f.index = i->ifi_index;
665: f.mtu = mtu;
666:
1.1.1.2 ! misho 667: f.master_index = master;
! 668: f.master = if_find_by_index(master);
! 669:
1.1 misho 670: fl = i->ifi_flags;
671: if (fl & IFF_UP)
672: f.flags |= IF_ADMIN_UP;
673: if (fl & IFF_LOWER_UP)
674: f.flags |= IF_LINK_UP;
675: if (fl & IFF_LOOPBACK) /* Loopback */
676: f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
677: else if (fl & IFF_POINTOPOINT) /* PtP */
678: f.flags |= IF_MULTICAST;
679: else if (fl & IFF_BROADCAST) /* Broadcast */
680: f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
681: else
682: f.flags |= IF_MULTIACCESS; /* NBMA */
683:
684: if (fl & IFF_MULTICAST)
685: f.flags |= IF_MULTICAST;
686:
687: ifi = if_update(&f);
688:
689: if (!scan)
690: if_end_partial_update(ifi);
691: }
692: }
693:
694: static void
695: nl_parse_addr(struct nlmsghdr *h, int scan)
696: {
697: struct ifaddrmsg *i;
698: struct rtattr *a[BIRD_IFA_MAX];
699: int new = h->nlmsg_type == RTM_NEWADDR;
700: struct ifa ifa;
701: struct iface *ifi;
702: int scope;
703: u32 ifa_flags;
704:
705: if (!(i = nl_checkin(h, sizeof(*i))))
706: return;
707:
708: switch (i->ifa_family)
709: {
710: #ifndef IPV6
711: case AF_INET:
712: if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
713: return;
714: if (!a[IFA_LOCAL])
715: {
716: log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
717: return;
718: }
719: break;
720: #else
721: case AF_INET6:
722: if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
723: return;
724: break;
725: #endif
726: default:
727: return;
728: }
729:
730: if (!a[IFA_ADDRESS])
731: {
732: log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
733: return;
734: }
735:
736: if (a[IFA_FLAGS])
737: ifa_flags = rta_get_u32(a[IFA_FLAGS]);
738: else
739: ifa_flags = i->ifa_flags;
740:
741: ifi = if_find_by_index(i->ifa_index);
742: if (!ifi)
743: {
744: log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
745: return;
746: }
747:
748: bzero(&ifa, sizeof(ifa));
749: ifa.iface = ifi;
750: if (ifa_flags & IFA_F_SECONDARY)
751: ifa.flags |= IA_SECONDARY;
752:
753: #ifdef IPV6
754: /* Ignore tentative addresses silently */
755: if (ifa_flags & IFA_F_TENTATIVE)
756: return;
757: #endif
758:
759: /* IFA_LOCAL can be unset for IPv6 interfaces */
760: memcpy(&ifa.ip, RTA_DATA(a[IFA_LOCAL] ? : a[IFA_ADDRESS]), sizeof(ifa.ip));
761: ipa_ntoh(ifa.ip);
762: ifa.pxlen = i->ifa_prefixlen;
763: if (i->ifa_prefixlen > BITS_PER_IP_ADDRESS)
764: {
765: log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
766: new = 0;
767: }
768: if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS)
769: {
770: ip_addr addr;
771: memcpy(&addr, RTA_DATA(a[IFA_ADDRESS]), sizeof(addr));
772: ipa_ntoh(addr);
773: ifa.prefix = ifa.brd = addr;
774:
775: /* It is either a host address or a peer address */
776: if (ipa_equal(ifa.ip, addr))
777: ifa.flags |= IA_HOST;
778: else
779: {
780: ifa.flags |= IA_PEER;
781: ifa.opposite = addr;
782: }
783: }
784: else
785: {
786: ip_addr netmask = ipa_mkmask(ifa.pxlen);
787: ifa.prefix = ipa_and(ifa.ip, netmask);
788: ifa.brd = ipa_or(ifa.ip, ipa_not(netmask));
789: if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 1)
790: ifa.opposite = ipa_opposite_m1(ifa.ip);
791:
792: #ifndef IPV6
793: if (i->ifa_prefixlen == BITS_PER_IP_ADDRESS - 2)
794: ifa.opposite = ipa_opposite_m2(ifa.ip);
795:
796: if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
797: {
798: ip_addr xbrd;
799: memcpy(&xbrd, RTA_DATA(a[IFA_BROADCAST]), sizeof(xbrd));
800: ipa_ntoh(xbrd);
801: if (ipa_equal(xbrd, ifa.prefix) || ipa_equal(xbrd, ifa.brd))
802: ifa.brd = xbrd;
803: else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
804: log(L_ERR "KIF: Invalid broadcast address %I for %s", xbrd, ifi->name);
805: }
806: #endif
807: }
808:
809: scope = ipa_classify(ifa.ip);
810: if (scope < 0)
811: {
812: log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
813: return;
814: }
815: ifa.scope = scope & IADDR_SCOPE_MASK;
816:
817: DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %I/%d, brd %I, opp %I\n",
818: ifi->index, ifi->name,
819: new ? "added" : "removed",
820: ifa.ip, ifa.flags, ifa.prefix, ifa.pxlen, ifa.brd, ifa.opposite);
821:
822: if (new)
823: ifa_update(&ifa);
824: else
825: ifa_delete(&ifa);
826:
827: if (!scan)
828: if_end_partial_update(ifi);
829: }
830:
831: void
832: kif_do_scan(struct kif_proto *p UNUSED)
833: {
834: struct nlmsghdr *h;
835:
836: if_start_update();
837:
838: nl_request_dump(AF_UNSPEC, RTM_GETLINK);
839: while (h = nl_get_scan())
840: if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
841: nl_parse_link(h, 1);
842: else
843: log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
844:
1.1.1.2 ! misho 845: /* Re-resolve master interface for slaves */
! 846: struct iface *i;
! 847: WALK_LIST(i, iface_list)
! 848: if (i->master_index)
! 849: {
! 850: struct iface f = {
! 851: .flags = i->flags,
! 852: .mtu = i->mtu,
! 853: .index = i->index,
! 854: .master_index = i->master_index,
! 855: .master = if_find_by_index(i->master_index)
! 856: };
! 857:
! 858: if (f.master != i->master)
! 859: {
! 860: memcpy(f.name, i->name, sizeof(f.name));
! 861: if_update(&f);
! 862: }
! 863: }
! 864:
1.1 misho 865: nl_request_dump(BIRD_AF, RTM_GETADDR);
866: while (h = nl_get_scan())
867: if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
868: nl_parse_addr(h, 1);
869: else
870: log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
871:
872: if_end_update();
873: }
874:
875: /*
876: * Routes
877: */
878:
879: static inline u32
880: krt_table_id(struct krt_proto *p)
881: {
882: return KRT_CF->sys.table_id;
883: }
884:
885: static HASH(struct krt_proto) nl_table_map;
886:
887: #define RTH_FN(k) u32_hash(k)
888: #define RTH_EQ(k1,k2) k1 == k2
889: #define RTH_KEY(p) krt_table_id(p)
890: #define RTH_NEXT(p) p->sys.hash_next
891:
892: #define RTH_REHASH rth_rehash
893: #define RTH_PARAMS /8, *2, 2, 2, 6, 20
894:
895: HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
896:
897: int
898: krt_capable(rte *e)
899: {
900: rta *a = e->attrs;
901:
902: if (a->cast != RTC_UNICAST)
903: return 0;
904:
905: switch (a->dest)
906: {
907: case RTD_ROUTER:
908: case RTD_DEVICE:
909: if (a->iface == NULL)
910: return 0;
911: case RTD_BLACKHOLE:
912: case RTD_UNREACHABLE:
913: case RTD_PROHIBIT:
914: case RTD_MULTIPATH:
915: break;
916: default:
917: return 0;
918: }
919: return 1;
920: }
921:
922: static inline int
923: nh_bufsize(struct mpnh *nh)
924: {
925: int rv = 0;
926: for (; nh != NULL; nh = nh->next)
927: rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
928: return rv;
929: }
930:
931: static int
932: nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, struct iface *iface)
933: {
934: eattr *ea;
935: net *net = e->net;
936: rta *a = e->attrs;
937: u32 priority = 0;
938:
939: struct {
940: struct nlmsghdr h;
941: struct rtmsg r;
1.1.1.2 ! misho 942: char buf[0];
! 943: } *r;
! 944:
! 945: uint rsize = sizeof(*r) + 128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops);
! 946: r = alloca(rsize);
1.1 misho 947:
948: DBG("nl_send_route(%I/%d,op=%x)\n", net->n.prefix, net->n.pxlen, op);
949:
1.1.1.2 ! misho 950: bzero(&r->h, sizeof(r->h));
! 951: bzero(&r->r, sizeof(r->r));
! 952: r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
! 953: r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
! 954: r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
! 955:
! 956: r->r.rtm_family = BIRD_AF;
! 957: r->r.rtm_dst_len = net->n.pxlen;
! 958: r->r.rtm_protocol = RTPROT_BIRD;
! 959: r->r.rtm_scope = RT_SCOPE_NOWHERE;
! 960: nl_add_attr_ipa(&r->h, rsize, RTA_DST, net->n.prefix);
1.1 misho 961:
962: /*
963: * Strange behavior for RTM_DELROUTE:
964: * 1) rtm_family is ignored in IPv6, works for IPv4
965: * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
966: * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
967: */
968:
969: if (krt_table_id(p) < 256)
1.1.1.2 ! misho 970: r->r.rtm_table = krt_table_id(p);
1.1 misho 971: else
1.1.1.2 ! misho 972: nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
1.1 misho 973:
974: if (a->source == RTS_DUMMY)
975: priority = e->u.krt.metric;
976: else if (KRT_CF->sys.metric)
977: priority = KRT_CF->sys.metric;
978: else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
979: priority = ea->u.data;
980:
981: if (priority)
1.1.1.2 ! misho 982: nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
1.1 misho 983:
984: /* For route delete, we do not specify remaining route attributes */
985: if (op == NL_OP_DELETE)
986: goto dest;
987:
988: /* Default scope is LINK for device routes, UNIVERSE otherwise */
989: if (ea = ea_find(eattrs, EA_KRT_SCOPE))
1.1.1.2 ! misho 990: r->r.rtm_scope = ea->u.data;
1.1 misho 991: else
1.1.1.2 ! misho 992: r->r.rtm_scope = (dest == RTD_DEVICE) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
1.1 misho 993:
994: if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
1.1.1.2 ! misho 995: nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
1.1 misho 996:
997: if (ea = ea_find(eattrs, EA_KRT_REALM))
1.1.1.2 ! misho 998: nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
1.1 misho 999:
1000:
1001: u32 metrics[KRT_METRICS_MAX];
1002: metrics[0] = 0;
1003:
1004: struct ea_walk_state ews = { .eattrs = eattrs };
1005: while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1006: {
1007: int id = ea->id - EA_KRT_METRICS;
1008: metrics[0] |= 1 << id;
1009: metrics[id] = ea->u.data;
1010: }
1011:
1012: if (metrics[0])
1.1.1.2 ! misho 1013: nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
1.1 misho 1014:
1015:
1016: dest:
1017: /* a->iface != NULL checked in krt_capable() for router and device routes */
1018: switch (dest)
1019: {
1020: case RTD_ROUTER:
1.1.1.2 ! misho 1021: r->r.rtm_type = RTN_UNICAST;
! 1022: nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index);
! 1023: nl_add_attr_ipa(&r->h, rsize, RTA_GATEWAY, gw);
1.1 misho 1024: break;
1025: case RTD_DEVICE:
1.1.1.2 ! misho 1026: r->r.rtm_type = RTN_UNICAST;
! 1027: nl_add_attr_u32(&r->h, rsize, RTA_OIF, iface->index);
1.1 misho 1028: break;
1029: case RTD_BLACKHOLE:
1.1.1.2 ! misho 1030: r->r.rtm_type = RTN_BLACKHOLE;
1.1 misho 1031: break;
1032: case RTD_UNREACHABLE:
1.1.1.2 ! misho 1033: r->r.rtm_type = RTN_UNREACHABLE;
1.1 misho 1034: break;
1035: case RTD_PROHIBIT:
1.1.1.2 ! misho 1036: r->r.rtm_type = RTN_PROHIBIT;
1.1 misho 1037: break;
1038: case RTD_MULTIPATH:
1.1.1.2 ! misho 1039: r->r.rtm_type = RTN_UNICAST;
! 1040: nl_add_multipath(&r->h, rsize, a->nexthops);
1.1 misho 1041: break;
1042: case RTD_NONE:
1043: break;
1044: default:
1045: bug("krt_capable inconsistent with nl_send_route");
1046: }
1047:
1048: /* Ignore missing for DELETE */
1.1.1.2 ! misho 1049: return nl_exchange(&r->h, (op == NL_OP_DELETE));
1.1 misho 1050: }
1051:
1052: static inline int
1053: nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
1054: {
1055: rta *a = e->attrs;
1056: int err = 0;
1057:
1058: if (krt_ecmp6(p) && (a->dest == RTD_MULTIPATH))
1059: {
1060: struct mpnh *nh = a->nexthops;
1061:
1062: err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface);
1063: if (err < 0)
1064: return err;
1065:
1066: for (nh = nh->next; nh; nh = nh->next)
1067: err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface);
1068:
1069: return err;
1070: }
1071:
1072: return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface);
1073: }
1074:
1075: static inline int
1076: nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
1077: {
1078: int err = 0;
1079:
1080: /* For IPv6, we just repeatedly request DELETE until we get error */
1081: do
1082: err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL);
1083: while (krt_ecmp6(p) && !err);
1084:
1085: return err;
1086: }
1087:
1088: void
1089: krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list *eattrs)
1090: {
1091: int err = 0;
1092:
1093: /*
1094: * We could use NL_OP_REPLACE, but route replace on Linux has some problems:
1095: *
1096: * 1) Does not check for matching rtm_protocol
1097: * 2) Has broken semantics for IPv6 ECMP
1098: * 3) Crashes some kernel version when used for IPv6 ECMP
1099: *
1100: * So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
1101: * route value, so we do not try to optimize IPv6 ECMP reconfigurations.
1102: */
1103:
1104: if (old)
1105: nl_delete_rte(p, old, eattrs);
1106:
1107: if (new)
1108: err = nl_add_rte(p, new, eattrs);
1109:
1110: if (err < 0)
1111: n->n.flags |= KRF_SYNC_ERROR;
1112: else
1113: n->n.flags &= ~KRF_SYNC_ERROR;
1114: }
1115:
1116:
1117: static inline struct mpnh *
1118: nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight)
1119: {
1120: struct mpnh *nh = lp_alloc(s->pool, sizeof(struct mpnh));
1121:
1122: nh->gw = gw;
1123: nh->iface = iface;
1124: nh->next = NULL;
1125: nh->weight = weight;
1126:
1127: return nh;
1128: }
1129:
1130: static int
1131: nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type)
1132: {
1133: /* Route merging must be active */
1134: if (!s->merge)
1135: return 0;
1136:
1137: /* Saved and new route must have same network, proto/table, and priority */
1138: if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1139: return 0;
1140:
1141: /* Both must be regular unicast routes */
1142: if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1143: return 0;
1144:
1145: return 1;
1146: }
1147:
1148: static void
1149: nl_announce_route(struct nl_parse_state *s)
1150: {
1151: rte *e = rte_get_temp(s->attrs);
1152: e->net = s->net;
1153: e->u.krt.src = s->krt_src;
1154: e->u.krt.proto = s->krt_proto;
1155: e->u.krt.seen = 0;
1156: e->u.krt.best = 0;
1157: e->u.krt.metric = s->krt_metric;
1158:
1159: if (s->scan)
1160: krt_got_route(s->proto, e);
1161: else
1162: krt_got_route_async(s->proto, e, s->new);
1163:
1164: s->net = NULL;
1165: s->attrs = NULL;
1166: s->proto = NULL;
1167: lp_flush(s->pool);
1168: }
1169:
1170: static inline void
1171: nl_parse_begin(struct nl_parse_state *s, int scan, int merge)
1172: {
1173: memset(s, 0, sizeof (struct nl_parse_state));
1174: s->pool = nl_linpool;
1175: s->scan = scan;
1176: s->merge = merge;
1177: }
1178:
1179: static inline void
1180: nl_parse_end(struct nl_parse_state *s)
1181: {
1182: if (s->net)
1183: nl_announce_route(s);
1184: }
1185:
1186:
1187: #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1188:
1189: static void
1190: nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
1191: {
1192: struct krt_proto *p;
1193: struct rtmsg *i;
1194: struct rtattr *a[BIRD_RTA_MAX];
1195: int new = h->nlmsg_type == RTM_NEWROUTE;
1196:
1197: ip_addr dst = IPA_NONE;
1198: u32 oif = ~0;
1199: u32 table;
1200: u32 priority = 0;
1201: u32 def_scope = RT_SCOPE_UNIVERSE;
1202: int src;
1203:
1204: if (!(i = nl_checkin(h, sizeof(*i))))
1205: return;
1206:
1207: switch (i->rtm_family)
1208: {
1209: #ifndef IPV6
1210: case AF_INET:
1211: if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1212: return;
1213: break;
1214: #else
1215: case AF_INET6:
1216: if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1217: return;
1218: break;
1219: #endif
1220: default:
1221: return;
1222: }
1223:
1224: if (a[RTA_DST])
1225: {
1226: memcpy(&dst, RTA_DATA(a[RTA_DST]), sizeof(dst));
1227: ipa_ntoh(dst);
1228: }
1229:
1230: if (a[RTA_OIF])
1231: oif = rta_get_u32(a[RTA_OIF]);
1232:
1233: if (a[RTA_TABLE])
1234: table = rta_get_u32(a[RTA_TABLE]);
1235: else
1236: table = i->rtm_table;
1237:
1238: p = HASH_FIND(nl_table_map, RTH, table); /* Do we know this table? */
1239: DBG("KRT: Got %I/%d, type=%d, oif=%d, table=%d, prid=%d, proto=%s\n", dst, i->rtm_dst_len, i->rtm_type, oif, table, i->rtm_protocol, p ? p->p.name : "(none)");
1240: if (!p)
1241: SKIP("unknown table %d\n", table);
1242:
1243: #ifdef IPV6
1244: if (a[RTA_IIF])
1245: SKIP("IIF set\n");
1246: #else
1247: if (i->rtm_tos != 0) /* We don't support TOS */
1248: SKIP("TOS %02x\n", i->rtm_tos);
1249: #endif
1250:
1251: if (s->scan && !new)
1252: SKIP("RTM_DELROUTE in scan\n");
1253:
1254: if (a[RTA_PRIORITY])
1255: priority = rta_get_u32(a[RTA_PRIORITY]);
1256:
1257: int c = ipa_classify_net(dst);
1258: if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1259: SKIP("strange class/scope\n");
1260:
1261: switch (i->rtm_protocol)
1262: {
1263: case RTPROT_UNSPEC:
1264: SKIP("proto unspec\n");
1265:
1266: case RTPROT_REDIRECT:
1267: src = KRT_SRC_REDIRECT;
1268: break;
1269:
1270: case RTPROT_KERNEL:
1271: src = KRT_SRC_KERNEL;
1272: return;
1273:
1274: case RTPROT_BIRD:
1275: if (!s->scan)
1276: SKIP("echo\n");
1277: src = KRT_SRC_BIRD;
1278: break;
1279:
1280: case RTPROT_BOOT:
1281: default:
1282: src = KRT_SRC_ALIEN;
1283: }
1284:
1285: net *net = net_get(p->p.table, dst, i->rtm_dst_len);
1286:
1287: if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type))
1288: nl_announce_route(s);
1289:
1290: rta *ra = lp_allocz(s->pool, sizeof(rta));
1291: ra->src = p->p.main_source;
1292: ra->source = RTS_INHERIT;
1293: ra->scope = SCOPE_UNIVERSE;
1294: ra->cast = RTC_UNICAST;
1295:
1296: switch (i->rtm_type)
1297: {
1298: case RTN_UNICAST:
1299:
1.1.1.2 ! misho 1300: if (a[RTA_MULTIPATH])
1.1 misho 1301: {
1302: ra->dest = RTD_MULTIPATH;
1.1.1.2 ! misho 1303: ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH], i->rtm_family);
1.1 misho 1304: if (!ra->nexthops)
1305: {
1306: log(L_ERR "KRT: Received strange multipath route %I/%d",
1307: net->n.prefix, net->n.pxlen);
1308: return;
1309: }
1310:
1311: break;
1312: }
1313:
1314: ra->iface = if_find_by_index(oif);
1315: if (!ra->iface)
1316: {
1317: log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
1318: net->n.prefix, net->n.pxlen, oif);
1319: return;
1320: }
1321:
1322: if (a[RTA_GATEWAY])
1323: {
1324: neighbor *ng;
1325: ra->dest = RTD_ROUTER;
1326: memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw));
1327: ipa_ntoh(ra->gw);
1328:
1329: #ifdef IPV6
1330: /* Silently skip strange 6to4 routes */
1331: if (ipa_in_net(ra->gw, IPA_NONE, 96))
1332: return;
1333: #endif
1334:
1335: ng = neigh_find2(&p->p, &ra->gw, ra->iface,
1336: (i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
1337: if (!ng || (ng->scope == SCOPE_HOST))
1338: {
1339: log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",
1340: net->n.prefix, net->n.pxlen, ra->gw);
1341: return;
1342: }
1343: }
1344: else
1345: {
1346: ra->dest = RTD_DEVICE;
1347: def_scope = RT_SCOPE_LINK;
1348: }
1349:
1350: break;
1351: case RTN_BLACKHOLE:
1352: ra->dest = RTD_BLACKHOLE;
1353: break;
1354: case RTN_UNREACHABLE:
1355: ra->dest = RTD_UNREACHABLE;
1356: break;
1357: case RTN_PROHIBIT:
1358: ra->dest = RTD_PROHIBIT;
1359: break;
1360: /* FIXME: What about RTN_THROW? */
1361: default:
1362: SKIP("type %d\n", i->rtm_type);
1363: return;
1364: }
1365:
1366: if (i->rtm_scope != def_scope)
1367: {
1368: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1369: ea->next = ra->eattrs;
1370: ra->eattrs = ea;
1371: ea->flags = EALF_SORTED;
1372: ea->count = 1;
1373: ea->attrs[0].id = EA_KRT_SCOPE;
1374: ea->attrs[0].flags = 0;
1375: ea->attrs[0].type = EAF_TYPE_INT;
1376: ea->attrs[0].u.data = i->rtm_scope;
1377: }
1378:
1379: if (a[RTA_PREFSRC])
1380: {
1381: ip_addr ps;
1382: memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps));
1383: ipa_ntoh(ps);
1384:
1385: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1386: ea->next = ra->eattrs;
1387: ra->eattrs = ea;
1388: ea->flags = EALF_SORTED;
1389: ea->count = 1;
1390: ea->attrs[0].id = EA_KRT_PREFSRC;
1391: ea->attrs[0].flags = 0;
1392: ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
1393: ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
1394: ea->attrs[0].u.ptr->length = sizeof(ps);
1395: memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
1396: }
1397:
1398: if (a[RTA_FLOW])
1399: {
1400: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1401: ea->next = ra->eattrs;
1402: ra->eattrs = ea;
1403: ea->flags = EALF_SORTED;
1404: ea->count = 1;
1405: ea->attrs[0].id = EA_KRT_REALM;
1406: ea->attrs[0].flags = 0;
1407: ea->attrs[0].type = EAF_TYPE_INT;
1408: ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
1409: }
1410:
1411: if (a[RTA_METRICS])
1412: {
1413: u32 metrics[KRT_METRICS_MAX];
1414: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
1415: int t, n = 0;
1416:
1417: if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1418: {
1419: log(L_ERR "KRT: Received route %I/%d with strange RTA_METRICS attribute",
1420: net->n.prefix, net->n.pxlen);
1421: return;
1422: }
1423:
1424: for (t = 1; t < KRT_METRICS_MAX; t++)
1425: if (metrics[0] & (1 << t))
1426: {
1427: ea->attrs[n].id = EA_CODE(EAP_KRT, KRT_METRICS_OFFSET + t);
1428: ea->attrs[n].flags = 0;
1429: ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1430: ea->attrs[n].u.data = metrics[t];
1431: n++;
1432: }
1433:
1434: if (n > 0)
1435: {
1436: ea->next = ra->eattrs;
1437: ea->flags = EALF_SORTED;
1438: ea->count = n;
1439: ra->eattrs = ea;
1440: }
1441: }
1442:
1443: /*
1444: * Ideally, now we would send the received route to the rest of kernel code.
1.1.1.2 ! misho 1445: * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
! 1446: * postpone it and merge next hops until the end of the sequence. Note that
! 1447: * proper multipath updates are rejected by nl_mergable_route(), so it is
! 1448: * always the first case for them.
1.1 misho 1449: */
1450:
1451: if (!s->net)
1452: {
1453: /* Store the new route */
1454: s->net = net;
1455: s->attrs = ra;
1456: s->proto = p;
1457: s->new = new;
1458: s->krt_src = src;
1459: s->krt_type = i->rtm_type;
1460: s->krt_proto = i->rtm_protocol;
1461: s->krt_metric = priority;
1462: }
1463: else
1464: {
1465: /* Merge next hops with the stored route */
1466: rta *a = s->attrs;
1467:
1468: if (a->dest != RTD_MULTIPATH)
1469: {
1470: a->dest = RTD_MULTIPATH;
1471: a->nexthops = nl_alloc_mpnh(s, a->gw, a->iface, 0);
1472: }
1473:
1474: mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->iface, 0));
1475: }
1476: }
1477:
1478: void
1479: krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1480: {
1481: struct nlmsghdr *h;
1482: struct nl_parse_state s;
1483:
1484: nl_parse_begin(&s, 1, krt_ecmp6(p));
1485:
1486: nl_request_dump(BIRD_AF, RTM_GETROUTE);
1487: while (h = nl_get_scan())
1488: if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1489: nl_parse_route(&s, h);
1490: else
1491: log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1492:
1493: nl_parse_end(&s);
1494: }
1495:
1496: /*
1497: * Asynchronous Netlink interface
1498: */
1499:
1500: static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1501: static byte *nl_async_rx_buffer; /* Receive buffer */
1502:
1503: static void
1504: nl_async_msg(struct nlmsghdr *h)
1505: {
1506: struct nl_parse_state s;
1507:
1508: switch (h->nlmsg_type)
1509: {
1510: case RTM_NEWROUTE:
1511: case RTM_DELROUTE:
1512: DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
1513: nl_parse_begin(&s, 0, 0);
1514: nl_parse_route(&s, h);
1515: nl_parse_end(&s);
1516: break;
1517: case RTM_NEWLINK:
1518: case RTM_DELLINK:
1519: DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1520: if (kif_proto)
1521: nl_parse_link(h, 0);
1522: break;
1523: case RTM_NEWADDR:
1524: case RTM_DELADDR:
1525: DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1526: if (kif_proto)
1527: nl_parse_addr(h, 0);
1528: break;
1529: default:
1530: DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1531: }
1532: }
1533:
1534: static int
1535: nl_async_hook(sock *sk, uint size UNUSED)
1536: {
1537: struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1538: struct sockaddr_nl sa;
1539: struct msghdr m = {
1540: .msg_name = &sa,
1541: .msg_namelen = sizeof(sa),
1542: .msg_iov = &iov,
1543: .msg_iovlen = 1,
1544: };
1545: struct nlmsghdr *h;
1546: int x;
1547: uint len;
1548:
1549: x = recvmsg(sk->fd, &m, 0);
1550: if (x < 0)
1551: {
1552: if (errno == ENOBUFS)
1553: {
1554: /*
1555: * Netlink reports some packets have been thrown away.
1556: * One day we might react to it by asking for route table
1557: * scan in near future.
1558: */
1559: log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
1560: return 1; /* More data are likely to be ready */
1561: }
1562: else if (errno != EWOULDBLOCK)
1563: log(L_ERR "Netlink recvmsg: %m");
1564: return 0;
1565: }
1566: if (sa.nl_pid) /* It isn't from the kernel */
1567: {
1568: DBG("Non-kernel packet\n");
1569: return 1;
1570: }
1571: h = (void *) nl_async_rx_buffer;
1572: len = x;
1573: if (m.msg_flags & MSG_TRUNC)
1574: {
1575: log(L_WARN "Netlink got truncated asynchronous message");
1576: return 1;
1577: }
1578: while (NLMSG_OK(h, len))
1579: {
1580: nl_async_msg(h);
1581: h = NLMSG_NEXT(h, len);
1582: }
1583: if (len)
1584: log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1585: return 1;
1586: }
1587:
1588: static void
1589: nl_async_err_hook(sock *sk, int e UNUSED)
1590: {
1591: nl_async_hook(sk, 0);
1592: }
1593:
1594: static void
1595: nl_open_async(void)
1596: {
1597: sock *sk;
1598: struct sockaddr_nl sa;
1599: int fd;
1600:
1601: if (nl_async_sk)
1602: return;
1603:
1604: DBG("KRT: Opening async netlink socket\n");
1605:
1606: fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1607: if (fd < 0)
1608: {
1609: log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1610: return;
1611: }
1612:
1613: bzero(&sa, sizeof(sa));
1614: sa.nl_family = AF_NETLINK;
1615: #ifdef IPV6
1616: sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1617: #else
1618: sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE;
1619: #endif
1620: if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1621: {
1622: log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
1623: close(fd);
1624: return;
1625: }
1626:
1627: nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1628:
1629: sk = nl_async_sk = sk_new(krt_pool);
1630: sk->type = SK_MAGIC;
1631: sk->rx_hook = nl_async_hook;
1632: sk->err_hook = nl_async_err_hook;
1633: sk->fd = fd;
1634: if (sk_open(sk) < 0)
1635: bug("Netlink: sk_open failed");
1636: }
1637:
1638:
1639: /*
1640: * Interface to the UNIX krt module
1641: */
1642:
1643: void
1644: krt_sys_io_init(void)
1645: {
1646: nl_linpool = lp_new(krt_pool, 4080);
1647: HASH_INIT(nl_table_map, krt_pool, 6);
1648: }
1649:
1650: int
1651: krt_sys_start(struct krt_proto *p)
1652: {
1653: struct krt_proto *old = HASH_FIND(nl_table_map, RTH, krt_table_id(p));
1654:
1655: if (old)
1656: {
1657: log(L_ERR "%s: Kernel table %u already registered by %s",
1658: p->p.name, krt_table_id(p), old->p.name);
1659: return 0;
1660: }
1661:
1662: HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
1663:
1664: nl_open();
1665: nl_open_async();
1666:
1667: return 1;
1668: }
1669:
1670: void
1671: krt_sys_shutdown(struct krt_proto *p)
1672: {
1673: HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
1674: }
1675:
1676: int
1677: krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
1678: {
1679: return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
1680: }
1681:
1682: void
1683: krt_sys_init_config(struct krt_config *cf)
1684: {
1685: cf->sys.table_id = RT_TABLE_MAIN;
1686: cf->sys.metric = 0;
1687: }
1688:
1689: void
1690: krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
1691: {
1692: d->sys.table_id = s->sys.table_id;
1693: d->sys.metric = s->sys.metric;
1694: }
1695:
1696: static const char *krt_metrics_names[KRT_METRICS_MAX] = {
1697: NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
1698: "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
1699: };
1700:
1701: static const char *krt_features_names[KRT_FEATURES_MAX] = {
1702: "ecn", NULL, NULL, "allfrag"
1703: };
1704:
1705: int
1706: krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
1707: {
1708: switch (a->id)
1709: {
1710: case EA_KRT_PREFSRC:
1711: bsprintf(buf, "prefsrc");
1712: return GA_NAME;
1713:
1714: case EA_KRT_REALM:
1715: bsprintf(buf, "realm");
1716: return GA_NAME;
1717:
1718: case EA_KRT_SCOPE:
1719: bsprintf(buf, "scope");
1720: return GA_NAME;
1721:
1722: case EA_KRT_LOCK:
1723: buf += bsprintf(buf, "lock:");
1724: ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
1725: return GA_FULL;
1726:
1727: case EA_KRT_FEATURES:
1728: buf += bsprintf(buf, "features:");
1729: ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
1730: return GA_FULL;
1731:
1732: default:;
1733: int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
1734: if (id > 0 && id < KRT_METRICS_MAX)
1735: {
1736: bsprintf(buf, "%s", krt_metrics_names[id]);
1737: return GA_NAME;
1738: }
1739:
1740: return GA_UNKNOWN;
1741: }
1742: }
1743:
1744:
1745:
1746: void
1747: kif_sys_start(struct kif_proto *p UNUSED)
1748: {
1749: nl_open();
1750: nl_open_async();
1751: }
1752:
1753: void
1754: kif_sys_shutdown(struct kif_proto *p UNUSED)
1755: {
1.1.1.2 ! misho 1756: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>