Annotation of embedaddon/bird2/sysdep/linux/netlink.c, revision 1.1.1.1
1.1 misho 1: /*
2: * BIRD -- Linux Netlink Interface
3: *
4: * (c) 1999--2000 Martin Mares <mj@ucw.cz>
5: *
6: * Can be freely distributed and used under the terms of the GNU GPL.
7: */
8:
9: #include <alloca.h>
10: #include <stdio.h>
11: #include <unistd.h>
12: #include <fcntl.h>
13: #include <sys/socket.h>
14: #include <sys/uio.h>
15: #include <errno.h>
16:
17: #undef LOCAL_DEBUG
18:
19: #include "nest/bird.h"
20: #include "nest/route.h"
21: #include "nest/protocol.h"
22: #include "nest/iface.h"
23: #include "lib/alloca.h"
24: #include "sysdep/unix/unix.h"
25: #include "sysdep/unix/krt.h"
26: #include "lib/socket.h"
27: #include "lib/string.h"
28: #include "lib/hash.h"
29: #include "conf/conf.h"
30:
31: #include <asm/types.h>
32: #include <linux/if.h>
33: #include <linux/netlink.h>
34: #include <linux/rtnetlink.h>
35:
36: #ifdef HAVE_MPLS_KERNEL
37: #include <linux/lwtunnel.h>
38: #endif
39:
40: #ifndef MSG_TRUNC /* Hack: Several versions of glibc miss this one :( */
41: #define MSG_TRUNC 0x20
42: #endif
43:
44: #ifndef IFA_FLAGS
45: #define IFA_FLAGS 8
46: #endif
47:
48: #ifndef IFF_LOWER_UP
49: #define IFF_LOWER_UP 0x10000
50: #endif
51:
52: #ifndef RTA_TABLE
53: #define RTA_TABLE 15
54: #endif
55:
56: #ifndef RTA_VIA
57: #define RTA_VIA 18
58: #endif
59:
60: #ifndef RTA_NEWDST
61: #define RTA_NEWDST 19
62: #endif
63:
64: #ifndef RTA_ENCAP_TYPE
65: #define RTA_ENCAP_TYPE 21
66: #endif
67:
68: #ifndef RTA_ENCAP
69: #define RTA_ENCAP 22
70: #endif
71:
72: #define krt_ipv4(p) ((p)->af == AF_INET)
73: #define krt_ecmp6(p) ((p)->af == AF_INET6)
74:
75: const int rt_default_ecmp = 16;
76:
77: /*
78: * Structure nl_parse_state keeps state of received route processing. Ideally,
79: * we could just independently parse received Netlink messages and immediately
80: * propagate received routes to the rest of BIRD, but older Linux kernel (before
81: * version 4.11) represents and announces IPv6 ECMP routes not as one route with
82: * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of
83: * routes with the same prefix. More recent kernels work as with IPv4.
84: *
85: * Therefore, BIRD keeps currently processed route in nl_parse_state structure
86: * and postpones its propagation until we expect it to be final; i.e., when
87: * non-matching route is received or when the scan ends. When another matching
88: * route is received, it is merged with the already processed route to form an
89: * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
90: * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6
91: * routes with RTA_MULTIPATH set are just considered non-matching.
92: *
93: * This is ignored for asynchronous notifications (every notification is handled
94: * as a separate route). It is not an issue for our routes, as we ignore such
95: * notifications anyways. But importing alien IPv6 ECMP routes does not work
96: * properly with older kernels.
97: *
98: * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes
99: * for the same prefix.
100: */
101:
102: struct nl_parse_state
103: {
104: struct linpool *pool;
105: int scan;
106: int merge;
107:
108: net *net;
109: rta *attrs;
110: struct krt_proto *proto;
111: s8 new;
112: s8 krt_src;
113: u8 krt_type;
114: u8 krt_proto;
115: u32 krt_metric;
116: };
117:
118: /*
119: * Synchronous Netlink interface
120: */
121:
122: struct nl_sock
123: {
124: int fd;
125: u32 seq;
126: byte *rx_buffer; /* Receive buffer */
127: struct nlmsghdr *last_hdr; /* Recently received packet */
128: uint last_size;
129: };
130:
131: #define NL_RX_SIZE 8192
132:
133: #define NL_OP_DELETE 0
134: #define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
135: #define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
136: #define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
137:
138: static linpool *nl_linpool;
139:
140: static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
141: static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
142:
143: static void
144: nl_open_sock(struct nl_sock *nl)
145: {
146: if (nl->fd < 0)
147: {
148: nl->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
149: if (nl->fd < 0)
150: die("Unable to open rtnetlink socket: %m");
151: nl->seq = (u32) (current_time() TO_S); /* Or perhaps random_u32() ? */
152: nl->rx_buffer = xmalloc(NL_RX_SIZE);
153: nl->last_hdr = NULL;
154: nl->last_size = 0;
155: }
156: }
157:
158: static void
159: nl_open(void)
160: {
161: nl_open_sock(&nl_scan);
162: nl_open_sock(&nl_req);
163: }
164:
165: static void
166: nl_send(struct nl_sock *nl, struct nlmsghdr *nh)
167: {
168: struct sockaddr_nl sa;
169:
170: memset(&sa, 0, sizeof(sa));
171: sa.nl_family = AF_NETLINK;
172: nh->nlmsg_pid = 0;
173: nh->nlmsg_seq = ++(nl->seq);
174: if (sendto(nl->fd, nh, nh->nlmsg_len, 0, (struct sockaddr *)&sa, sizeof(sa)) < 0)
175: die("rtnetlink sendto: %m");
176: nl->last_hdr = NULL;
177: }
178:
179: static void
180: nl_request_dump(int af, int cmd)
181: {
182: struct {
183: struct nlmsghdr nh;
184: struct rtgenmsg g;
185: } req = {
186: .nh.nlmsg_type = cmd,
187: .nh.nlmsg_len = sizeof(req),
188: .nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
189: .g.rtgen_family = af
190: };
191: nl_send(&nl_scan, &req.nh);
192: }
193:
194: static struct nlmsghdr *
195: nl_get_reply(struct nl_sock *nl)
196: {
197: for(;;)
198: {
199: if (!nl->last_hdr)
200: {
201: struct iovec iov = { nl->rx_buffer, NL_RX_SIZE };
202: struct sockaddr_nl sa;
203: struct msghdr m = {
204: .msg_name = &sa,
205: .msg_namelen = sizeof(sa),
206: .msg_iov = &iov,
207: .msg_iovlen = 1,
208: };
209: int x = recvmsg(nl->fd, &m, 0);
210: if (x < 0)
211: die("nl_get_reply: %m");
212: if (sa.nl_pid) /* It isn't from the kernel */
213: {
214: DBG("Non-kernel packet\n");
215: continue;
216: }
217: nl->last_size = x;
218: nl->last_hdr = (void *) nl->rx_buffer;
219: if (m.msg_flags & MSG_TRUNC)
220: bug("nl_get_reply: got truncated reply which should be impossible");
221: }
222: if (NLMSG_OK(nl->last_hdr, nl->last_size))
223: {
224: struct nlmsghdr *h = nl->last_hdr;
225: nl->last_hdr = NLMSG_NEXT(h, nl->last_size);
226: if (h->nlmsg_seq != nl->seq)
227: {
228: log(L_WARN "nl_get_reply: Ignoring out of sequence netlink packet (%x != %x)",
229: h->nlmsg_seq, nl->seq);
230: continue;
231: }
232: return h;
233: }
234: if (nl->last_size)
235: log(L_WARN "nl_get_reply: Found packet remnant of size %d", nl->last_size);
236: nl->last_hdr = NULL;
237: }
238: }
239:
240: static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
241:
242: static int
243: nl_error(struct nlmsghdr *h, int ignore_esrch)
244: {
245: struct nlmsgerr *e;
246: int ec;
247:
248: if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr)))
249: {
250: log(L_WARN "Netlink: Truncated error message received");
251: return ENOBUFS;
252: }
253: e = (struct nlmsgerr *) NLMSG_DATA(h);
254: ec = -e->error;
255: if (ec && !(ignore_esrch && (ec == ESRCH)))
256: log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
257: return ec;
258: }
259:
260: static struct nlmsghdr *
261: nl_get_scan(void)
262: {
263: struct nlmsghdr *h = nl_get_reply(&nl_scan);
264:
265: if (h->nlmsg_type == NLMSG_DONE)
266: return NULL;
267: if (h->nlmsg_type == NLMSG_ERROR)
268: {
269: nl_error(h, 0);
270: return NULL;
271: }
272: return h;
273: }
274:
275: static int
276: nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
277: {
278: struct nlmsghdr *h;
279:
280: nl_send(&nl_req, pkt);
281: for(;;)
282: {
283: h = nl_get_reply(&nl_req);
284: if (h->nlmsg_type == NLMSG_ERROR)
285: break;
286: log(L_WARN "nl_exchange: Unexpected reply received");
287: }
288: return nl_error(h, ignore_esrch) ? -1 : 0;
289: }
290:
291: /*
292: * Netlink attributes
293: */
294:
295: static int nl_attr_len;
296:
297: static void *
298: nl_checkin(struct nlmsghdr *h, int lsize)
299: {
300: nl_attr_len = h->nlmsg_len - NLMSG_LENGTH(lsize);
301: if (nl_attr_len < 0)
302: {
303: log(L_ERR "nl_checkin: underrun by %d bytes", -nl_attr_len);
304: return NULL;
305: }
306: return NLMSG_DATA(h);
307: }
308:
309: struct nl_want_attrs {
310: u8 defined:1;
311: u8 checksize:1;
312: u8 size;
313: };
314:
315:
316: #define BIRD_IFLA_MAX (IFLA_WIRELESS+1)
317:
318: static struct nl_want_attrs ifla_attr_want[BIRD_IFLA_MAX] = {
319: [IFLA_IFNAME] = { 1, 0, 0 },
320: [IFLA_MTU] = { 1, 1, sizeof(u32) },
321: [IFLA_MASTER] = { 1, 1, sizeof(u32) },
322: [IFLA_WIRELESS] = { 1, 0, 0 },
323: };
324:
325:
326: #define BIRD_IFA_MAX (IFA_FLAGS+1)
327:
328: static struct nl_want_attrs ifa_attr_want4[BIRD_IFA_MAX] = {
329: [IFA_ADDRESS] = { 1, 1, sizeof(ip4_addr) },
330: [IFA_LOCAL] = { 1, 1, sizeof(ip4_addr) },
331: [IFA_BROADCAST] = { 1, 1, sizeof(ip4_addr) },
332: [IFA_FLAGS] = { 1, 1, sizeof(u32) },
333: };
334:
335: static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
336: [IFA_ADDRESS] = { 1, 1, sizeof(ip6_addr) },
337: [IFA_LOCAL] = { 1, 1, sizeof(ip6_addr) },
338: [IFA_FLAGS] = { 1, 1, sizeof(u32) },
339: };
340:
341:
342: #define BIRD_RTA_MAX (RTA_ENCAP+1)
343:
344: static struct nl_want_attrs nexthop_attr_want4[BIRD_RTA_MAX] = {
345: [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
346: [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
347: [RTA_ENCAP] = { 1, 0, 0 },
348: };
349:
350: static struct nl_want_attrs nexthop_attr_want6[BIRD_RTA_MAX] = {
351: [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
352: [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
353: [RTA_ENCAP] = { 1, 0, 0 },
354: };
355:
356: #ifdef HAVE_MPLS_KERNEL
357: static struct nl_want_attrs encap_mpls_want[BIRD_RTA_MAX] = {
358: [RTA_DST] = { 1, 0, 0 },
359: };
360: #endif
361:
362: static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
363: [RTA_DST] = { 1, 1, sizeof(ip4_addr) },
364: [RTA_OIF] = { 1, 1, sizeof(u32) },
365: [RTA_GATEWAY] = { 1, 1, sizeof(ip4_addr) },
366: [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
367: [RTA_PREFSRC] = { 1, 1, sizeof(ip4_addr) },
368: [RTA_METRICS] = { 1, 0, 0 },
369: [RTA_MULTIPATH] = { 1, 0, 0 },
370: [RTA_FLOW] = { 1, 1, sizeof(u32) },
371: [RTA_TABLE] = { 1, 1, sizeof(u32) },
372: [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
373: [RTA_ENCAP] = { 1, 0, 0 },
374: };
375:
376: static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
377: [RTA_DST] = { 1, 1, sizeof(ip6_addr) },
378: [RTA_SRC] = { 1, 1, sizeof(ip6_addr) },
379: [RTA_IIF] = { 1, 1, sizeof(u32) },
380: [RTA_OIF] = { 1, 1, sizeof(u32) },
381: [RTA_GATEWAY] = { 1, 1, sizeof(ip6_addr) },
382: [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
383: [RTA_PREFSRC] = { 1, 1, sizeof(ip6_addr) },
384: [RTA_METRICS] = { 1, 0, 0 },
385: [RTA_MULTIPATH] = { 1, 0, 0 },
386: [RTA_FLOW] = { 1, 1, sizeof(u32) },
387: [RTA_TABLE] = { 1, 1, sizeof(u32) },
388: [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
389: [RTA_ENCAP] = { 1, 0, 0 },
390: };
391:
392: #ifdef HAVE_MPLS_KERNEL
393: static struct nl_want_attrs rtm_attr_want_mpls[BIRD_RTA_MAX] = {
394: [RTA_DST] = { 1, 1, sizeof(u32) },
395: [RTA_IIF] = { 1, 1, sizeof(u32) },
396: [RTA_OIF] = { 1, 1, sizeof(u32) },
397: [RTA_PRIORITY] = { 1, 1, sizeof(u32) },
398: [RTA_METRICS] = { 1, 0, 0 },
399: [RTA_FLOW] = { 1, 1, sizeof(u32) },
400: [RTA_TABLE] = { 1, 1, sizeof(u32) },
401: [RTA_VIA] = { 1, 0, 0 },
402: [RTA_NEWDST] = { 1, 0, 0 },
403: };
404: #endif
405:
406:
407: static int
408: nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k, int ksize)
409: {
410: int max = ksize / sizeof(struct rtattr *);
411: bzero(k, ksize);
412:
413: for ( ; RTA_OK(a, nl_attr_len); a = RTA_NEXT(a, nl_attr_len))
414: {
415: if ((a->rta_type >= max) || !want[a->rta_type].defined)
416: continue;
417:
418: if (want[a->rta_type].checksize && (RTA_PAYLOAD(a) != want[a->rta_type].size))
419: {
420: log(L_ERR "nl_parse_attrs: Malformed attribute received");
421: return 0;
422: }
423:
424: k[a->rta_type] = a;
425: }
426:
427: if (nl_attr_len)
428: {
429: log(L_ERR "nl_parse_attrs: remnant of size %d", nl_attr_len);
430: return 0;
431: }
432:
433: return 1;
434: }
435:
436: static inline u16 rta_get_u16(struct rtattr *a)
437: { return *(u16 *) RTA_DATA(a); }
438:
439: static inline u32 rta_get_u32(struct rtattr *a)
440: { return *(u32 *) RTA_DATA(a); }
441:
442: static inline ip4_addr rta_get_ip4(struct rtattr *a)
443: { return ip4_ntoh(*(ip4_addr *) RTA_DATA(a)); }
444:
445: static inline ip6_addr rta_get_ip6(struct rtattr *a)
446: { return ip6_ntoh(*(ip6_addr *) RTA_DATA(a)); }
447:
448: static inline ip_addr rta_get_ipa(struct rtattr *a)
449: {
450: if (RTA_PAYLOAD(a) == sizeof(ip4_addr))
451: return ipa_from_ip4(rta_get_ip4(a));
452: else
453: return ipa_from_ip6(rta_get_ip6(a));
454: }
455:
456: #ifdef HAVE_MPLS_KERNEL
457: static inline ip_addr rta_get_via(struct rtattr *a)
458: {
459: struct rtvia *v = RTA_DATA(a);
460: switch(v->rtvia_family) {
461: case AF_INET: return ipa_from_ip4(ip4_ntoh(*(ip4_addr *) v->rtvia_addr));
462: case AF_INET6: return ipa_from_ip6(ip6_ntoh(*(ip6_addr *) v->rtvia_addr));
463: }
464: return IPA_NONE;
465: }
466:
467: static u32 rta_mpls_stack[MPLS_MAX_LABEL_STACK];
468: static inline int rta_get_mpls(struct rtattr *a, u32 *stack)
469: {
470: if (!a)
471: return 0;
472:
473: if (RTA_PAYLOAD(a) % 4)
474: log(L_WARN "KRT: Strange length of received MPLS stack: %u", RTA_PAYLOAD(a));
475:
476: int labels = mpls_get(RTA_DATA(a), RTA_PAYLOAD(a) & ~0x3, stack);
477:
478: if (labels < 0)
479: {
480: log(L_WARN "KRT: Too long MPLS stack received, ignoring");
481: labels = 0;
482: }
483:
484: return labels;
485: }
486: #endif
487:
488: struct rtattr *
489: nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint dlen)
490: {
491: uint pos = NLMSG_ALIGN(h->nlmsg_len);
492: uint len = RTA_LENGTH(dlen);
493:
494: if (pos + len > bufsize)
495: bug("nl_add_attr: packet buffer overflow");
496:
497: struct rtattr *a = (struct rtattr *)((char *)h + pos);
498: a->rta_type = code;
499: a->rta_len = len;
500: h->nlmsg_len = pos + len;
501:
502: if (dlen > 0)
503: memcpy(RTA_DATA(a), data, dlen);
504:
505: return a;
506: }
507:
508: static inline struct rtattr *
509: nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
510: {
511: return nl_add_attr(h, bufsize, code, NULL, 0);
512: }
513:
514: static inline void
515: nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
516: {
517: a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
518: }
519:
520: static inline void
521: nl_add_attr_u16(struct nlmsghdr *h, uint bufsize, int code, u16 data)
522: {
523: nl_add_attr(h, bufsize, code, &data, 2);
524: }
525:
526: static inline void
527: nl_add_attr_u32(struct nlmsghdr *h, uint bufsize, int code, u32 data)
528: {
529: nl_add_attr(h, bufsize, code, &data, 4);
530: }
531:
532: static inline void
533: nl_add_attr_ip4(struct nlmsghdr *h, uint bufsize, int code, ip4_addr ip4)
534: {
535: ip4 = ip4_hton(ip4);
536: nl_add_attr(h, bufsize, code, &ip4, sizeof(ip4));
537: }
538:
539: static inline void
540: nl_add_attr_ip6(struct nlmsghdr *h, uint bufsize, int code, ip6_addr ip6)
541: {
542: ip6 = ip6_hton(ip6);
543: nl_add_attr(h, bufsize, code, &ip6, sizeof(ip6));
544: }
545:
546: static inline void
547: nl_add_attr_ipa(struct nlmsghdr *h, uint bufsize, int code, ip_addr ipa)
548: {
549: if (ipa_is_ip4(ipa))
550: nl_add_attr_ip4(h, bufsize, code, ipa_to_ip4(ipa));
551: else
552: nl_add_attr_ip6(h, bufsize, code, ipa_to_ip6(ipa));
553: }
554:
555: #ifdef HAVE_MPLS_KERNEL
556: static inline void
557: nl_add_attr_mpls(struct nlmsghdr *h, uint bufsize, int code, int len, u32 *stack)
558: {
559: char buf[len*4];
560: mpls_put(buf, len, stack);
561: nl_add_attr(h, bufsize, code, buf, len*4);
562: }
563:
564: static inline void
565: nl_add_attr_mpls_encap(struct nlmsghdr *h, uint bufsize, int len, u32 *stack)
566: {
567: nl_add_attr_u16(h, bufsize, RTA_ENCAP_TYPE, LWTUNNEL_ENCAP_MPLS);
568:
569: struct rtattr *nest = nl_open_attr(h, bufsize, RTA_ENCAP);
570: nl_add_attr_mpls(h, bufsize, RTA_DST, len, stack);
571: nl_close_attr(h, nest);
572: }
573:
574: static inline void
575: nl_add_attr_via(struct nlmsghdr *h, uint bufsize, ip_addr ipa)
576: {
577: struct rtvia *via = alloca(sizeof(struct rtvia) + 16);
578:
579: if (ipa_is_ip4(ipa))
580: {
581: via->rtvia_family = AF_INET;
582: put_ip4(via->rtvia_addr, ipa_to_ip4(ipa));
583: nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 4);
584: }
585: else
586: {
587: via->rtvia_family = AF_INET6;
588: put_ip6(via->rtvia_addr, ipa_to_ip6(ipa));
589: nl_add_attr(h, bufsize, RTA_VIA, via, sizeof(struct rtvia) + 16);
590: }
591: }
592: #endif
593:
594: static inline struct rtnexthop *
595: nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
596: {
597: uint pos = NLMSG_ALIGN(h->nlmsg_len);
598: uint len = RTNH_LENGTH(0);
599:
600: if (pos + len > bufsize)
601: bug("nl_open_nexthop: packet buffer overflow");
602:
603: h->nlmsg_len = pos + len;
604:
605: return (void *)h + pos;
606: }
607:
608: static inline void
609: nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
610: {
611: nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
612: }
613:
614: static inline void
615: nl_add_nexthop(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af UNUSED)
616: {
617: #ifdef HAVE_MPLS_KERNEL
618: if (nh->labels > 0)
619: if (af == AF_MPLS)
620: nl_add_attr_mpls(h, bufsize, RTA_NEWDST, nh->labels, nh->label);
621: else
622: nl_add_attr_mpls_encap(h, bufsize, nh->labels, nh->label);
623:
624: if (ipa_nonzero(nh->gw))
625: if (af == AF_MPLS)
626: nl_add_attr_via(h, bufsize, nh->gw);
627: else
628: nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
629: #else
630:
631: if (ipa_nonzero(nh->gw))
632: nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
633: #endif
634: }
635:
636: static void
637: nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af)
638: {
639: struct rtattr *a = nl_open_attr(h, bufsize, RTA_MULTIPATH);
640:
641: for (; nh; nh = nh->next)
642: {
643: struct rtnexthop *rtnh = nl_open_nexthop(h, bufsize);
644:
645: rtnh->rtnh_flags = 0;
646: rtnh->rtnh_hops = nh->weight;
647: rtnh->rtnh_ifindex = nh->iface->index;
648:
649: nl_add_nexthop(h, bufsize, nh, af);
650:
651: if (nh->flags & RNF_ONLINK)
652: rtnh->rtnh_flags |= RTNH_F_ONLINK;
653:
654: nl_close_nexthop(h, rtnh);
655: }
656:
657: nl_close_attr(h, a);
658: }
659:
660: static struct nexthop *
661: nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, struct rtattr *ra, int af)
662: {
663: struct rtattr *a[BIRD_RTA_MAX];
664: struct rtnexthop *nh = RTA_DATA(ra);
665: struct nexthop *rv, *first, **last;
666: unsigned len = RTA_PAYLOAD(ra);
667:
668: first = NULL;
669: last = &first;
670:
671: while (len)
672: {
673: /* Use RTNH_OK(nh,len) ?? */
674: if ((len < sizeof(*nh)) || (len < nh->rtnh_len))
675: return NULL;
676:
677: *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE);
678: last = &(rv->next);
679:
680: rv->weight = nh->rtnh_hops;
681: rv->iface = if_find_by_index(nh->rtnh_ifindex);
682: if (!rv->iface)
683: return NULL;
684:
685: /* Nonexistent RTNH_PAYLOAD ?? */
686: nl_attr_len = nh->rtnh_len - RTNH_LENGTH(0);
687: switch (af)
688: {
689: case AF_INET:
690: if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want4, a, sizeof(a)))
691: return NULL;
692: break;
693:
694: case AF_INET6:
695: if (!nl_parse_attrs(RTNH_DATA(nh), nexthop_attr_want6, a, sizeof(a)))
696: return NULL;
697: break;
698:
699: default:
700: return NULL;
701: }
702:
703: if (a[RTA_GATEWAY])
704: {
705: rv->gw = rta_get_ipa(a[RTA_GATEWAY]);
706:
707: if (nh->rtnh_flags & RTNH_F_ONLINK)
708: rv->flags |= RNF_ONLINK;
709:
710: neighbor *nbr;
711: nbr = neigh_find(&p->p, rv->gw, rv->iface,
712: (rv->flags & RNF_ONLINK) ? NEF_ONLINK : 0);
713: if (!nbr || (nbr->scope == SCOPE_HOST))
714: return NULL;
715: }
716: else
717: rv->gw = IPA_NONE;
718:
719: #ifdef HAVE_MPLS_KERNEL
720: if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE])
721: {
722: if (rta_get_u16(a[RTA_ENCAP_TYPE]) != LWTUNNEL_ENCAP_MPLS) {
723: log(L_WARN "KRT: Unknown encapsulation method %d in multipath", rta_get_u16(a[RTA_ENCAP_TYPE]));
724: return NULL;
725: }
726:
727: struct rtattr *enca[BIRD_RTA_MAX];
728: nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
729: nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
730: rv->labels = rta_get_mpls(enca[RTA_DST], rv->label);
731: }
732: #endif
733:
734:
735: len -= NLMSG_ALIGN(nh->rtnh_len);
736: nh = RTNH_NEXT(nh);
737: }
738:
739: /* Ensure nexthops are sorted to satisfy nest invariant */
740: if (!nexthop_is_sorted(first))
741: first = nexthop_sort(first);
742:
743: return first;
744: }
745:
746: static void
747: nl_add_metrics(struct nlmsghdr *h, uint bufsize, u32 *metrics, int max)
748: {
749: struct rtattr *a = nl_open_attr(h, bufsize, RTA_METRICS);
750: int t;
751:
752: for (t = 1; t < max; t++)
753: if (metrics[0] & (1 << t))
754: nl_add_attr_u32(h, bufsize, t, metrics[t]);
755:
756: nl_close_attr(h, a);
757: }
758:
759: static int
760: nl_parse_metrics(struct rtattr *hdr, u32 *metrics, int max)
761: {
762: struct rtattr *a = RTA_DATA(hdr);
763: int len = RTA_PAYLOAD(hdr);
764:
765: metrics[0] = 0;
766: for (; RTA_OK(a, len); a = RTA_NEXT(a, len))
767: {
768: if (a->rta_type == RTA_UNSPEC)
769: continue;
770:
771: if (a->rta_type >= max)
772: continue;
773:
774: if (RTA_PAYLOAD(a) != 4)
775: return -1;
776:
777: metrics[0] |= 1 << a->rta_type;
778: metrics[a->rta_type] = rta_get_u32(a);
779: }
780:
781: if (len > 0)
782: return -1;
783:
784: return 0;
785: }
786:
787:
788: /*
789: * Scanning of interfaces
790: */
791:
792: static void
793: nl_parse_link(struct nlmsghdr *h, int scan)
794: {
795: struct ifinfomsg *i;
796: struct rtattr *a[BIRD_IFLA_MAX];
797: int new = h->nlmsg_type == RTM_NEWLINK;
798: struct iface f = {};
799: struct iface *ifi;
800: char *name;
801: u32 mtu, master = 0;
802: uint fl;
803:
804: if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(IFLA_RTA(i), ifla_attr_want, a, sizeof(a)))
805: return;
806: if (!a[IFLA_IFNAME] || (RTA_PAYLOAD(a[IFLA_IFNAME]) < 2) || !a[IFLA_MTU])
807: {
808: /*
809: * IFLA_IFNAME and IFLA_MTU are required, in fact, but there may also come
810: * a message with IFLA_WIRELESS set, where (e.g.) no IFLA_IFNAME exists.
811: * We simply ignore all such messages with IFLA_WIRELESS without notice.
812: */
813:
814: if (a[IFLA_WIRELESS])
815: return;
816:
817: log(L_ERR "KIF: Malformed message received");
818: return;
819: }
820:
821: name = RTA_DATA(a[IFLA_IFNAME]);
822: mtu = rta_get_u32(a[IFLA_MTU]);
823:
824: if (a[IFLA_MASTER])
825: master = rta_get_u32(a[IFLA_MASTER]);
826:
827: ifi = if_find_by_index(i->ifi_index);
828: if (!new)
829: {
830: DBG("KIF: IF%d(%s) goes down\n", i->ifi_index, name);
831: if (!ifi)
832: return;
833:
834: if_delete(ifi);
835: }
836: else
837: {
838: DBG("KIF: IF%d(%s) goes up (mtu=%d,flg=%x)\n", i->ifi_index, name, mtu, i->ifi_flags);
839: if (ifi && strncmp(ifi->name, name, sizeof(ifi->name)-1))
840: if_delete(ifi);
841:
842: strncpy(f.name, name, sizeof(f.name)-1);
843: f.index = i->ifi_index;
844: f.mtu = mtu;
845:
846: f.master_index = master;
847: f.master = if_find_by_index(master);
848:
849: fl = i->ifi_flags;
850: if (fl & IFF_UP)
851: f.flags |= IF_ADMIN_UP;
852: if (fl & IFF_LOWER_UP)
853: f.flags |= IF_LINK_UP;
854: if (fl & IFF_LOOPBACK) /* Loopback */
855: f.flags |= IF_MULTIACCESS | IF_LOOPBACK | IF_IGNORE;
856: else if (fl & IFF_POINTOPOINT) /* PtP */
857: f.flags |= IF_MULTICAST;
858: else if (fl & IFF_BROADCAST) /* Broadcast */
859: f.flags |= IF_MULTIACCESS | IF_BROADCAST | IF_MULTICAST;
860: else
861: f.flags |= IF_MULTIACCESS; /* NBMA */
862:
863: if (fl & IFF_MULTICAST)
864: f.flags |= IF_MULTICAST;
865:
866: ifi = if_update(&f);
867:
868: if (!scan)
869: if_end_partial_update(ifi);
870: }
871: }
872:
873: static void
874: nl_parse_addr4(struct ifaddrmsg *i, int scan, int new)
875: {
876: struct rtattr *a[BIRD_IFA_MAX];
877: struct iface *ifi;
878: u32 ifa_flags;
879: int scope;
880:
881: if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want4, a, sizeof(a)))
882: return;
883:
884: if (!a[IFA_LOCAL])
885: {
886: log(L_ERR "KIF: Malformed message received (missing IFA_LOCAL)");
887: return;
888: }
889: if (!a[IFA_ADDRESS])
890: {
891: log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
892: return;
893: }
894:
895: ifi = if_find_by_index(i->ifa_index);
896: if (!ifi)
897: {
898: log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
899: return;
900: }
901:
902: if (a[IFA_FLAGS])
903: ifa_flags = rta_get_u32(a[IFA_FLAGS]);
904: else
905: ifa_flags = i->ifa_flags;
906:
907: struct ifa ifa;
908: bzero(&ifa, sizeof(ifa));
909: ifa.iface = ifi;
910: if (ifa_flags & IFA_F_SECONDARY)
911: ifa.flags |= IA_SECONDARY;
912:
913: ifa.ip = rta_get_ipa(a[IFA_LOCAL]);
914:
915: if (i->ifa_prefixlen > IP4_MAX_PREFIX_LENGTH)
916: {
917: log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
918: new = 0;
919: }
920: if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH)
921: {
922: ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
923: net_fill_ip4(&ifa.prefix, rta_get_ip4(a[IFA_ADDRESS]), i->ifa_prefixlen);
924:
925: /* It is either a host address or a peer address */
926: if (ipa_equal(ifa.ip, ifa.brd))
927: ifa.flags |= IA_HOST;
928: else
929: {
930: ifa.flags |= IA_PEER;
931: ifa.opposite = ifa.brd;
932: }
933: }
934: else
935: {
936: net_fill_ip4(&ifa.prefix, ipa_to_ip4(ifa.ip), i->ifa_prefixlen);
937: net_normalize(&ifa.prefix);
938:
939: if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 1)
940: ifa.opposite = ipa_opposite_m1(ifa.ip);
941:
942: if (i->ifa_prefixlen == IP4_MAX_PREFIX_LENGTH - 2)
943: ifa.opposite = ipa_opposite_m2(ifa.ip);
944:
945: if ((ifi->flags & IF_BROADCAST) && a[IFA_BROADCAST])
946: {
947: ip4_addr xbrd = rta_get_ip4(a[IFA_BROADCAST]);
948: ip4_addr ybrd = ip4_or(ipa_to_ip4(ifa.ip), ip4_not(ip4_mkmask(i->ifa_prefixlen)));
949:
950: if (ip4_equal(xbrd, net4_prefix(&ifa.prefix)) || ip4_equal(xbrd, ybrd))
951: ifa.brd = ipa_from_ip4(xbrd);
952: else if (ifi->flags & IF_TMP_DOWN) /* Complain only during the first scan */
953: {
954: log(L_ERR "KIF: Invalid broadcast address %I4 for %s", xbrd, ifi->name);
955: ifa.brd = ipa_from_ip4(ybrd);
956: }
957: }
958: }
959:
960: scope = ipa_classify(ifa.ip);
961: if (scope < 0)
962: {
963: log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
964: return;
965: }
966: ifa.scope = scope & IADDR_SCOPE_MASK;
967:
968: DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
969: ifi->index, ifi->name,
970: new ? "added" : "removed",
971: ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
972:
973: if (new)
974: ifa_update(&ifa);
975: else
976: ifa_delete(&ifa);
977:
978: if (!scan)
979: if_end_partial_update(ifi);
980: }
981:
982: static void
983: nl_parse_addr6(struct ifaddrmsg *i, int scan, int new)
984: {
985: struct rtattr *a[BIRD_IFA_MAX];
986: struct iface *ifi;
987: u32 ifa_flags;
988: int scope;
989:
990: if (!nl_parse_attrs(IFA_RTA(i), ifa_attr_want6, a, sizeof(a)))
991: return;
992:
993: if (!a[IFA_ADDRESS])
994: {
995: log(L_ERR "KIF: Malformed message received (missing IFA_ADDRESS)");
996: return;
997: }
998:
999: ifi = if_find_by_index(i->ifa_index);
1000: if (!ifi)
1001: {
1002: log(L_ERR "KIF: Received address message for unknown interface %d", i->ifa_index);
1003: return;
1004: }
1005:
1006: if (a[IFA_FLAGS])
1007: ifa_flags = rta_get_u32(a[IFA_FLAGS]);
1008: else
1009: ifa_flags = i->ifa_flags;
1010:
1011: struct ifa ifa;
1012: bzero(&ifa, sizeof(ifa));
1013: ifa.iface = ifi;
1014: if (ifa_flags & IFA_F_SECONDARY)
1015: ifa.flags |= IA_SECONDARY;
1016:
1017: /* Ignore tentative addresses silently */
1018: if (ifa_flags & IFA_F_TENTATIVE)
1019: return;
1020:
1021: /* IFA_LOCAL can be unset for IPv6 interfaces */
1022: ifa.ip = rta_get_ipa(a[IFA_LOCAL] ? : a[IFA_ADDRESS]);
1023:
1024: if (i->ifa_prefixlen > IP6_MAX_PREFIX_LENGTH)
1025: {
1026: log(L_ERR "KIF: Invalid prefix length for interface %s: %d", ifi->name, i->ifa_prefixlen);
1027: new = 0;
1028: }
1029: if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH)
1030: {
1031: ifa.brd = rta_get_ipa(a[IFA_ADDRESS]);
1032: net_fill_ip6(&ifa.prefix, rta_get_ip6(a[IFA_ADDRESS]), i->ifa_prefixlen);
1033:
1034: /* It is either a host address or a peer address */
1035: if (ipa_equal(ifa.ip, ifa.brd))
1036: ifa.flags |= IA_HOST;
1037: else
1038: {
1039: ifa.flags |= IA_PEER;
1040: ifa.opposite = ifa.brd;
1041: }
1042: }
1043: else
1044: {
1045: net_fill_ip6(&ifa.prefix, ipa_to_ip6(ifa.ip), i->ifa_prefixlen);
1046: net_normalize(&ifa.prefix);
1047:
1048: if (i->ifa_prefixlen == IP6_MAX_PREFIX_LENGTH - 1)
1049: ifa.opposite = ipa_opposite_m1(ifa.ip);
1050: }
1051:
1052: scope = ipa_classify(ifa.ip);
1053: if (scope < 0)
1054: {
1055: log(L_ERR "KIF: Invalid interface address %I for %s", ifa.ip, ifi->name);
1056: return;
1057: }
1058: ifa.scope = scope & IADDR_SCOPE_MASK;
1059:
1060: DBG("KIF: IF%d(%s): %s IPA %I, flg %x, net %N, brd %I, opp %I\n",
1061: ifi->index, ifi->name,
1062: new ? "added" : "removed",
1063: ifa.ip, ifa.flags, &ifa.prefix, ifa.brd, ifa.opposite);
1064:
1065: if (new)
1066: ifa_update(&ifa);
1067: else
1068: ifa_delete(&ifa);
1069:
1070: if (!scan)
1071: if_end_partial_update(ifi);
1072: }
1073:
1074: static void
1075: nl_parse_addr(struct nlmsghdr *h, int scan)
1076: {
1077: struct ifaddrmsg *i;
1078:
1079: if (!(i = nl_checkin(h, sizeof(*i))))
1080: return;
1081:
1082: int new = (h->nlmsg_type == RTM_NEWADDR);
1083:
1084: switch (i->ifa_family)
1085: {
1086: case AF_INET:
1087: return nl_parse_addr4(i, scan, new);
1088:
1089: case AF_INET6:
1090: return nl_parse_addr6(i, scan, new);
1091: }
1092: }
1093:
1094: void
1095: kif_do_scan(struct kif_proto *p UNUSED)
1096: {
1097: struct nlmsghdr *h;
1098:
1099: if_start_update();
1100:
1101: nl_request_dump(AF_UNSPEC, RTM_GETLINK);
1102: while (h = nl_get_scan())
1103: if (h->nlmsg_type == RTM_NEWLINK || h->nlmsg_type == RTM_DELLINK)
1104: nl_parse_link(h, 1);
1105: else
1106: log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
1107:
1108: /* Re-resolve master interface for slaves */
1109: struct iface *i;
1110: WALK_LIST(i, iface_list)
1111: if (i->master_index)
1112: {
1113: struct iface f = {
1114: .flags = i->flags,
1115: .mtu = i->mtu,
1116: .index = i->index,
1117: .master_index = i->master_index,
1118: .master = if_find_by_index(i->master_index)
1119: };
1120:
1121: if (f.master != i->master)
1122: {
1123: memcpy(f.name, i->name, sizeof(f.name));
1124: if_update(&f);
1125: }
1126: }
1127:
1128: nl_request_dump(AF_INET, RTM_GETADDR);
1129: while (h = nl_get_scan())
1130: if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1131: nl_parse_addr(h, 1);
1132: else
1133: log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
1134:
1135: nl_request_dump(AF_INET6, RTM_GETADDR);
1136: while (h = nl_get_scan())
1137: if (h->nlmsg_type == RTM_NEWADDR || h->nlmsg_type == RTM_DELADDR)
1138: nl_parse_addr(h, 1);
1139: else
1140: log(L_DEBUG "nl_scan_ifaces: Unknown packet received (type=%d)", h->nlmsg_type);
1141:
1142: if_end_update();
1143: }
1144:
1145: /*
1146: * Routes
1147: */
1148:
1149: static inline u32
1150: krt_table_id(struct krt_proto *p)
1151: {
1152: return KRT_CF->sys.table_id;
1153: }
1154:
1155: static HASH(struct krt_proto) nl_table_map;
1156:
1157: #define RTH_KEY(p) p->af, krt_table_id(p)
1158: #define RTH_NEXT(p) p->sys.hash_next
1159: #define RTH_EQ(a1,i1,a2,i2) a1 == a2 && i1 == i2
1160: #define RTH_FN(a,i) a ^ u32_hash(i)
1161:
1162: #define RTH_REHASH rth_rehash
1163: #define RTH_PARAMS /8, *2, 2, 2, 6, 20
1164:
1165: HASH_DEFINE_REHASH_FN(RTH, struct krt_proto)
1166:
1167: int
1168: krt_capable(rte *e)
1169: {
1170: rta *a = e->attrs;
1171:
1172: switch (a->dest)
1173: {
1174: case RTD_UNICAST:
1175: case RTD_BLACKHOLE:
1176: case RTD_UNREACHABLE:
1177: case RTD_PROHIBIT:
1178: return 1;
1179:
1180: default:
1181: return 0;
1182: }
1183: }
1184:
1185: static inline int
1186: nh_bufsize(struct nexthop *nh)
1187: {
1188: int rv = 0;
1189: for (; nh != NULL; nh = nh->next)
1190: rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
1191: return rv;
1192: }
1193:
1194: static int
1195: nl_send_route(struct krt_proto *p, rte *e, int op, int dest, struct nexthop *nh)
1196: {
1197: eattr *ea;
1198: net *net = e->net;
1199: rta *a = e->attrs;
1200: ea_list *eattrs = a->eattrs;
1201: int bufsize = 128 + KRT_METRICS_MAX*8 + nh_bufsize(&(a->nh));
1202: u32 priority = 0;
1203:
1204: struct {
1205: struct nlmsghdr h;
1206: struct rtmsg r;
1207: char buf[0];
1208: } *r;
1209:
1210: int rsize = sizeof(*r) + bufsize;
1211: r = alloca(rsize);
1212:
1213: DBG("nl_send_route(%N,op=%x)\n", net->n.addr, op);
1214:
1215: bzero(&r->h, sizeof(r->h));
1216: bzero(&r->r, sizeof(r->r));
1217: r->h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
1218: r->h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1219: r->h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
1220:
1221: r->r.rtm_family = p->af;
1222: r->r.rtm_dst_len = net_pxlen(net->n.addr);
1223: r->r.rtm_protocol = RTPROT_BIRD;
1224: r->r.rtm_scope = RT_SCOPE_NOWHERE;
1225: #ifdef HAVE_MPLS_KERNEL
1226: if (p->af == AF_MPLS)
1227: {
1228: /*
1229: * Kernel MPLS code is a bit picky. We must:
1230: * 1) Always set RT_SCOPE_UNIVERSE and RTN_UNICAST (even for RTM_DELROUTE)
1231: * 2) Never use RTA_PRIORITY
1232: */
1233:
1234: u32 label = net_mpls(net->n.addr);
1235: nl_add_attr_mpls(&r->h, rsize, RTA_DST, 1, &label);
1236: r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1237: r->r.rtm_type = RTN_UNICAST;
1238: }
1239: else
1240: #endif
1241: {
1242: nl_add_attr_ipa(&r->h, rsize, RTA_DST, net_prefix(net->n.addr));
1243:
1244: /* Add source address for IPv6 SADR routes */
1245: if (net->n.addr->type == NET_IP6_SADR)
1246: {
1247: net_addr_ip6_sadr *a = (void *) &net->n.addr;
1248: nl_add_attr_ip6(&r->h, rsize, RTA_SRC, a->src_prefix);
1249: r->r.rtm_src_len = a->src_pxlen;
1250: }
1251: }
1252:
1253: /*
1254: * Strange behavior for RTM_DELROUTE:
1255: * 1) rtm_family is ignored in IPv6, works for IPv4
1256: * 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
1257: * 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
1258: */
1259:
1260: if (krt_table_id(p) < 256)
1261: r->r.rtm_table = krt_table_id(p);
1262: else
1263: nl_add_attr_u32(&r->h, rsize, RTA_TABLE, krt_table_id(p));
1264:
1265: if (p->af == AF_MPLS)
1266: priority = 0;
1267: else if (a->source == RTS_DUMMY)
1268: priority = e->u.krt.metric;
1269: else if (KRT_CF->sys.metric)
1270: priority = KRT_CF->sys.metric;
1271: else if ((op != NL_OP_DELETE) && (ea = ea_find(eattrs, EA_KRT_METRIC)))
1272: priority = ea->u.data;
1273:
1274: if (priority)
1275: nl_add_attr_u32(&r->h, rsize, RTA_PRIORITY, priority);
1276:
1277: /* For route delete, we do not specify remaining route attributes */
1278: if (op == NL_OP_DELETE)
1279: goto dest;
1280:
1281: /* Default scope is LINK for device routes, UNIVERSE otherwise */
1282: if (p->af == AF_MPLS)
1283: r->r.rtm_scope = RT_SCOPE_UNIVERSE;
1284: else if (ea = ea_find(eattrs, EA_KRT_SCOPE))
1285: r->r.rtm_scope = ea->u.data;
1286: else
1287: r->r.rtm_scope = (dest == RTD_UNICAST && ipa_zero(nh->gw)) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
1288:
1289: if (ea = ea_find(eattrs, EA_KRT_PREFSRC))
1290: nl_add_attr_ipa(&r->h, rsize, RTA_PREFSRC, *(ip_addr *)ea->u.ptr->data);
1291:
1292: if (ea = ea_find(eattrs, EA_KRT_REALM))
1293: nl_add_attr_u32(&r->h, rsize, RTA_FLOW, ea->u.data);
1294:
1295:
1296: u32 metrics[KRT_METRICS_MAX];
1297: metrics[0] = 0;
1298:
1299: struct ea_walk_state ews = { .eattrs = eattrs };
1300: while (ea = ea_walk(&ews, EA_KRT_METRICS, KRT_METRICS_MAX))
1301: {
1302: int id = ea->id - EA_KRT_METRICS;
1303: metrics[0] |= 1 << id;
1304: metrics[id] = ea->u.data;
1305: }
1306:
1307: if (metrics[0])
1308: nl_add_metrics(&r->h, rsize, metrics, KRT_METRICS_MAX);
1309:
1310:
1311: dest:
1312: switch (dest)
1313: {
1314: case RTD_UNICAST:
1315: r->r.rtm_type = RTN_UNICAST;
1316: if (nh->next && !krt_ecmp6(p))
1317: nl_add_multipath(&r->h, rsize, nh, p->af);
1318: else
1319: {
1320: nl_add_attr_u32(&r->h, rsize, RTA_OIF, nh->iface->index);
1321: nl_add_nexthop(&r->h, rsize, nh, p->af);
1322:
1323: if (nh->flags & RNF_ONLINK)
1324: r->r.rtm_flags |= RTNH_F_ONLINK;
1325: }
1326: break;
1327: case RTD_BLACKHOLE:
1328: r->r.rtm_type = RTN_BLACKHOLE;
1329: break;
1330: case RTD_UNREACHABLE:
1331: r->r.rtm_type = RTN_UNREACHABLE;
1332: break;
1333: case RTD_PROHIBIT:
1334: r->r.rtm_type = RTN_PROHIBIT;
1335: break;
1336: case RTD_NONE:
1337: break;
1338: default:
1339: bug("krt_capable inconsistent with nl_send_route");
1340: }
1341:
1342: /* Ignore missing for DELETE */
1343: return nl_exchange(&r->h, (op == NL_OP_DELETE));
1344: }
1345:
1346: static inline int
1347: nl_add_rte(struct krt_proto *p, rte *e)
1348: {
1349: rta *a = e->attrs;
1350: int err = 0;
1351:
1352: if (krt_ecmp6(p) && a->nh.next)
1353: {
1354: struct nexthop *nh = &(a->nh);
1355:
1356: err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh);
1357: if (err < 0)
1358: return err;
1359:
1360: for (nh = nh->next; nh; nh = nh->next)
1361: err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh);
1362:
1363: return err;
1364: }
1365:
1366: return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh));
1367: }
1368:
1369: static inline int
1370: nl_delete_rte(struct krt_proto *p, rte *e)
1371: {
1372: int err = 0;
1373:
1374: /* For IPv6, we just repeatedly request DELETE until we get error */
1375: do
1376: err = nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL);
1377: while (krt_ecmp6(p) && !err);
1378:
1379: return err;
1380: }
1381:
1382: static inline int
1383: nl_replace_rte(struct krt_proto *p, rte *e)
1384: {
1385: rta *a = e->attrs;
1386: return nl_send_route(p, e, NL_OP_REPLACE, a->dest, &(a->nh));
1387: }
1388:
1389:
1390: void
1391: krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old)
1392: {
1393: int err = 0;
1394:
1395: /*
1396: * We use NL_OP_REPLACE for IPv4, it has an issue with not checking for
1397: * matching rtm_protocol, but that is OK when dedicated priority is used.
1398: *
1399: * We do not use NL_OP_REPLACE for IPv6, as it has broken semantics for ECMP
1400: * and with some kernel versions ECMP replace crashes kernel. Would need more
1401: * testing and checks for kernel versions.
1402: *
1403: * For IPv6, we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the
1404: * old route value, so we do not try to optimize IPv6 ECMP reconfigurations.
1405: */
1406:
1407: if (krt_ipv4(p) && old && new)
1408: {
1409: err = nl_replace_rte(p, new);
1410: }
1411: else
1412: {
1413: if (old)
1414: nl_delete_rte(p, old);
1415:
1416: if (new)
1417: err = nl_add_rte(p, new);
1418: }
1419:
1420: if (err < 0)
1421: n->n.flags |= KRF_SYNC_ERROR;
1422: else
1423: n->n.flags &= ~KRF_SYNC_ERROR;
1424: }
1425:
1426: static int
1427: nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family)
1428: {
1429: /* Route merging is used for IPv6 scans */
1430: if (!s->scan || (rtm_family != AF_INET6))
1431: return 0;
1432:
1433: /* Saved and new route must have same network, proto/table, and priority */
1434: if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
1435: return 0;
1436:
1437: /* Both must be regular unicast routes */
1438: if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
1439: return 0;
1440:
1441: return 1;
1442: }
1443:
1444: static void
1445: nl_announce_route(struct nl_parse_state *s)
1446: {
1447: rte *e = rte_get_temp(s->attrs);
1448: e->net = s->net;
1449: e->u.krt.src = s->krt_src;
1450: e->u.krt.proto = s->krt_proto;
1451: e->u.krt.seen = 0;
1452: e->u.krt.best = 0;
1453: e->u.krt.metric = s->krt_metric;
1454:
1455: if (s->scan)
1456: krt_got_route(s->proto, e);
1457: else
1458: krt_got_route_async(s->proto, e, s->new);
1459:
1460: s->net = NULL;
1461: s->attrs = NULL;
1462: s->proto = NULL;
1463: lp_flush(s->pool);
1464: }
1465:
1466: static inline void
1467: nl_parse_begin(struct nl_parse_state *s, int scan)
1468: {
1469: memset(s, 0, sizeof (struct nl_parse_state));
1470: s->pool = nl_linpool;
1471: s->scan = scan;
1472: }
1473:
1474: static inline void
1475: nl_parse_end(struct nl_parse_state *s)
1476: {
1477: if (s->net)
1478: nl_announce_route(s);
1479: }
1480:
1481:
1482: #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
1483:
1484: static void
1485: nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
1486: {
1487: struct krt_proto *p;
1488: struct rtmsg *i;
1489: struct rtattr *a[BIRD_RTA_MAX];
1490: int new = h->nlmsg_type == RTM_NEWROUTE;
1491:
1492: net_addr dst, src = {};
1493: u32 oif = ~0;
1494: u32 table_id;
1495: u32 priority = 0;
1496: u32 def_scope = RT_SCOPE_UNIVERSE;
1497: int krt_src;
1498:
1499: if (!(i = nl_checkin(h, sizeof(*i))))
1500: return;
1501:
1502: switch (i->rtm_family)
1503: {
1504: case AF_INET:
1505: if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want4, a, sizeof(a)))
1506: return;
1507:
1508: if (a[RTA_DST])
1509: net_fill_ip4(&dst, rta_get_ip4(a[RTA_DST]), i->rtm_dst_len);
1510: else
1511: net_fill_ip4(&dst, IP4_NONE, 0);
1512: break;
1513:
1514: case AF_INET6:
1515: if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want6, a, sizeof(a)))
1516: return;
1517:
1518: if (a[RTA_DST])
1519: net_fill_ip6(&dst, rta_get_ip6(a[RTA_DST]), i->rtm_dst_len);
1520: else
1521: net_fill_ip6(&dst, IP6_NONE, 0);
1522:
1523: if (a[RTA_SRC])
1524: net_fill_ip6(&src, rta_get_ip6(a[RTA_SRC]), i->rtm_src_len);
1525: else
1526: net_fill_ip6(&src, IP6_NONE, 0);
1527: break;
1528:
1529: #ifdef HAVE_MPLS_KERNEL
1530: case AF_MPLS:
1531: if (!nl_parse_attrs(RTM_RTA(i), rtm_attr_want_mpls, a, sizeof(a)))
1532: return;
1533:
1534: if (!a[RTA_DST])
1535: SKIP("MPLS route without RTA_DST");
1536:
1537: if (rta_get_mpls(a[RTA_DST], rta_mpls_stack) != 1)
1538: SKIP("MPLS route with multi-label RTA_DST");
1539:
1540: net_fill_mpls(&dst, rta_mpls_stack[0]);
1541: break;
1542: #endif
1543:
1544: default:
1545: return;
1546: }
1547:
1548: if (a[RTA_OIF])
1549: oif = rta_get_u32(a[RTA_OIF]);
1550:
1551: if (a[RTA_TABLE])
1552: table_id = rta_get_u32(a[RTA_TABLE]);
1553: else
1554: table_id = i->rtm_table;
1555:
1556: /* Do we know this table? */
1557: p = HASH_FIND(nl_table_map, RTH, i->rtm_family, table_id);
1558: if (!p)
1559: SKIP("unknown table %u\n", table_id);
1560:
1561: if (a[RTA_SRC] && (p->p.net_type != NET_IP6_SADR))
1562: SKIP("src prefix for non-SADR channel\n");
1563:
1564: if (a[RTA_IIF])
1565: SKIP("IIF set\n");
1566:
1567: if (i->rtm_tos != 0) /* We don't support TOS */
1568: SKIP("TOS %02x\n", i->rtm_tos);
1569:
1570: if (s->scan && !new)
1571: SKIP("RTM_DELROUTE in scan\n");
1572:
1573: if (a[RTA_PRIORITY])
1574: priority = rta_get_u32(a[RTA_PRIORITY]);
1575:
1576: int c = net_classify(&dst);
1577: if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
1578: SKIP("strange class/scope\n");
1579:
1580: switch (i->rtm_protocol)
1581: {
1582: case RTPROT_UNSPEC:
1583: SKIP("proto unspec\n");
1584:
1585: case RTPROT_REDIRECT:
1586: krt_src = KRT_SRC_REDIRECT;
1587: break;
1588:
1589: case RTPROT_KERNEL:
1590: krt_src = KRT_SRC_KERNEL;
1591: return;
1592:
1593: case RTPROT_BIRD:
1594: if (!s->scan)
1595: SKIP("echo\n");
1596: krt_src = KRT_SRC_BIRD;
1597: break;
1598:
1599: case RTPROT_BOOT:
1600: default:
1601: krt_src = KRT_SRC_ALIEN;
1602: }
1603:
1604: net_addr *n = &dst;
1605: if (p->p.net_type == NET_IP6_SADR)
1606: {
1607: n = alloca(sizeof(net_addr_ip6_sadr));
1608: net_fill_ip6_sadr(n, net6_prefix(&dst), net6_pxlen(&dst),
1609: net6_prefix(&src), net6_pxlen(&src));
1610: }
1611:
1612: net *net = net_get(p->p.main_channel->table, n);
1613:
1614: if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family))
1615: nl_announce_route(s);
1616:
1617: rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE);
1618: ra->src = p->p.main_source;
1619: ra->source = RTS_INHERIT;
1620: ra->scope = SCOPE_UNIVERSE;
1621:
1622: switch (i->rtm_type)
1623: {
1624: case RTN_UNICAST:
1625: ra->dest = RTD_UNICAST;
1626:
1627: if (a[RTA_MULTIPATH])
1628: {
1629: struct nexthop *nh = nl_parse_multipath(s, p, a[RTA_MULTIPATH], i->rtm_family);
1630: if (!nh)
1631: {
1632: log(L_ERR "KRT: Received strange multipath route %N", net->n.addr);
1633: return;
1634: }
1635:
1636: nexthop_link(ra, nh);
1637: break;
1638: }
1639:
1640: ra->nh.iface = if_find_by_index(oif);
1641: if (!ra->nh.iface)
1642: {
1643: log(L_ERR "KRT: Received route %N with unknown ifindex %u", net->n.addr, oif);
1644: return;
1645: }
1646:
1647: if ((i->rtm_family != AF_MPLS) && a[RTA_GATEWAY]
1648: #ifdef HAVE_MPLS_KERNEL
1649: || (i->rtm_family == AF_MPLS) && a[RTA_VIA]
1650: #endif
1651: )
1652: {
1653: #ifdef HAVE_MPLS_KERNEL
1654: if (i->rtm_family == AF_MPLS)
1655: ra->nh.gw = rta_get_via(a[RTA_VIA]);
1656: else
1657: #endif
1658: ra->nh.gw = rta_get_ipa(a[RTA_GATEWAY]);
1659:
1660: /* Silently skip strange 6to4 routes */
1661: const net_addr_ip6 sit = NET_ADDR_IP6(IP6_NONE, 96);
1662: if ((i->rtm_family == AF_INET6) && ipa_in_netX(ra->nh.gw, (net_addr *) &sit))
1663: return;
1664:
1665: if (i->rtm_flags & RTNH_F_ONLINK)
1666: ra->nh.flags |= RNF_ONLINK;
1667:
1668: neighbor *nbr;
1669: nbr = neigh_find(&p->p, ra->nh.gw, ra->nh.iface,
1670: (ra->nh.flags & RNF_ONLINK) ? NEF_ONLINK : 0);
1671: if (!nbr || (nbr->scope == SCOPE_HOST))
1672: {
1673: log(L_ERR "KRT: Received route %N with strange next-hop %I", net->n.addr,
1674: ra->nh.gw);
1675: return;
1676: }
1677: }
1678:
1679: break;
1680: case RTN_BLACKHOLE:
1681: ra->dest = RTD_BLACKHOLE;
1682: break;
1683: case RTN_UNREACHABLE:
1684: ra->dest = RTD_UNREACHABLE;
1685: break;
1686: case RTN_PROHIBIT:
1687: ra->dest = RTD_PROHIBIT;
1688: break;
1689: /* FIXME: What about RTN_THROW? */
1690: default:
1691: SKIP("type %d\n", i->rtm_type);
1692: return;
1693: }
1694:
1695: #ifdef HAVE_MPLS_KERNEL
1696: if ((i->rtm_family == AF_MPLS) && a[RTA_NEWDST] && !ra->nh.next)
1697: ra->nh.labels = rta_get_mpls(a[RTA_NEWDST], ra->nh.label);
1698:
1699: if (a[RTA_ENCAP] && a[RTA_ENCAP_TYPE] && !ra->nh.next)
1700: {
1701: switch (rta_get_u16(a[RTA_ENCAP_TYPE]))
1702: {
1703: case LWTUNNEL_ENCAP_MPLS:
1704: {
1705: struct rtattr *enca[BIRD_RTA_MAX];
1706: nl_attr_len = RTA_PAYLOAD(a[RTA_ENCAP]);
1707: nl_parse_attrs(RTA_DATA(a[RTA_ENCAP]), encap_mpls_want, enca, sizeof(enca));
1708: ra->nh.labels = rta_get_mpls(enca[RTA_DST], ra->nh.label);
1709: break;
1710: }
1711: default:
1712: SKIP("unknown encapsulation method %d\n", rta_get_u16(a[RTA_ENCAP_TYPE]));
1713: break;
1714: }
1715: }
1716: #endif
1717:
1718: if (i->rtm_scope != def_scope)
1719: {
1720: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1721: ea->next = ra->eattrs;
1722: ra->eattrs = ea;
1723: ea->flags = EALF_SORTED;
1724: ea->count = 1;
1725: ea->attrs[0].id = EA_KRT_SCOPE;
1726: ea->attrs[0].flags = 0;
1727: ea->attrs[0].type = EAF_TYPE_INT;
1728: ea->attrs[0].u.data = i->rtm_scope;
1729: }
1730:
1731: if (a[RTA_PREFSRC])
1732: {
1733: ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]);
1734:
1735: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1736: ea->next = ra->eattrs;
1737: ra->eattrs = ea;
1738: ea->flags = EALF_SORTED;
1739: ea->count = 1;
1740: ea->attrs[0].id = EA_KRT_PREFSRC;
1741: ea->attrs[0].flags = 0;
1742: ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
1743:
1744: struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
1745: ad->length = sizeof(ps);
1746: memcpy(ad->data, &ps, sizeof(ps));
1747:
1748: ea->attrs[0].u.ptr = ad;
1749: }
1750:
1751: if (a[RTA_FLOW])
1752: {
1753: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
1754: ea->next = ra->eattrs;
1755: ra->eattrs = ea;
1756: ea->flags = EALF_SORTED;
1757: ea->count = 1;
1758: ea->attrs[0].id = EA_KRT_REALM;
1759: ea->attrs[0].flags = 0;
1760: ea->attrs[0].type = EAF_TYPE_INT;
1761: ea->attrs[0].u.data = rta_get_u32(a[RTA_FLOW]);
1762: }
1763:
1764: if (a[RTA_METRICS])
1765: {
1766: u32 metrics[KRT_METRICS_MAX];
1767: ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
1768: int t, n = 0;
1769:
1770: if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
1771: {
1772: log(L_ERR "KRT: Received route %N with strange RTA_METRICS attribute", net->n.addr);
1773: return;
1774: }
1775:
1776: for (t = 1; t < KRT_METRICS_MAX; t++)
1777: if (metrics[0] & (1 << t))
1778: {
1779: ea->attrs[n].id = EA_CODE(PROTOCOL_KERNEL, KRT_METRICS_OFFSET + t);
1780: ea->attrs[n].flags = 0;
1781: ea->attrs[n].type = EAF_TYPE_INT; /* FIXME: Some are EAF_TYPE_BITFIELD */
1782: ea->attrs[n].u.data = metrics[t];
1783: n++;
1784: }
1785:
1786: if (n > 0)
1787: {
1788: ea->next = ra->eattrs;
1789: ea->flags = EALF_SORTED;
1790: ea->count = n;
1791: ra->eattrs = ea;
1792: }
1793: }
1794:
1795: /*
1796: * Ideally, now we would send the received route to the rest of kernel code.
1797: * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we
1798: * postpone it and merge next hops until the end of the sequence. Note that
1799: * when doing merging of next hops, we expect the new route to be unipath.
1800: * Otherwise, we ignore additional next hops in nexthop_insert().
1801: */
1802:
1803: if (!s->net)
1804: {
1805: /* Store the new route */
1806: s->net = net;
1807: s->attrs = ra;
1808: s->proto = p;
1809: s->new = new;
1810: s->krt_src = krt_src;
1811: s->krt_type = i->rtm_type;
1812: s->krt_proto = i->rtm_protocol;
1813: s->krt_metric = priority;
1814: }
1815: else
1816: {
1817: /* Merge next hops with the stored route */
1818: rta *oa = s->attrs;
1819:
1820: struct nexthop *nhs = &oa->nh;
1821: nexthop_insert(&nhs, &ra->nh);
1822:
1823: /* Perhaps new nexthop is inserted at the first position */
1824: if (nhs == &ra->nh)
1825: {
1826: /* Swap rtas */
1827: s->attrs = ra;
1828:
1829: /* Keep old eattrs */
1830: ra->eattrs = oa->eattrs;
1831: }
1832: }
1833: }
1834:
1835: void
1836: krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
1837: {
1838: struct nlmsghdr *h;
1839: struct nl_parse_state s;
1840:
1841: nl_parse_begin(&s, 1);
1842: nl_request_dump(AF_UNSPEC, RTM_GETROUTE);
1843: while (h = nl_get_scan())
1844: if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
1845: nl_parse_route(&s, h);
1846: else
1847: log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
1848: nl_parse_end(&s);
1849: }
1850:
1851: /*
1852: * Asynchronous Netlink interface
1853: */
1854:
1855: static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
1856: static byte *nl_async_rx_buffer; /* Receive buffer */
1857:
1858: static void
1859: nl_async_msg(struct nlmsghdr *h)
1860: {
1861: struct nl_parse_state s;
1862:
1863: switch (h->nlmsg_type)
1864: {
1865: case RTM_NEWROUTE:
1866: case RTM_DELROUTE:
1867: DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
1868: nl_parse_begin(&s, 0);
1869: nl_parse_route(&s, h);
1870: nl_parse_end(&s);
1871: break;
1872: case RTM_NEWLINK:
1873: case RTM_DELLINK:
1874: DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
1875: if (kif_proto)
1876: nl_parse_link(h, 0);
1877: break;
1878: case RTM_NEWADDR:
1879: case RTM_DELADDR:
1880: DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
1881: if (kif_proto)
1882: nl_parse_addr(h, 0);
1883: break;
1884: default:
1885: DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
1886: }
1887: }
1888:
1889: static int
1890: nl_async_hook(sock *sk, uint size UNUSED)
1891: {
1892: struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
1893: struct sockaddr_nl sa;
1894: struct msghdr m = {
1895: .msg_name = &sa,
1896: .msg_namelen = sizeof(sa),
1897: .msg_iov = &iov,
1898: .msg_iovlen = 1,
1899: };
1900: struct nlmsghdr *h;
1901: int x;
1902: uint len;
1903:
1904: x = recvmsg(sk->fd, &m, 0);
1905: if (x < 0)
1906: {
1907: if (errno == ENOBUFS)
1908: {
1909: /*
1910: * Netlink reports some packets have been thrown away.
1911: * One day we might react to it by asking for route table
1912: * scan in near future.
1913: */
1914: log(L_WARN "Kernel dropped some netlink messages, will resync on next scan.");
1915: return 1; /* More data are likely to be ready */
1916: }
1917: else if (errno != EWOULDBLOCK)
1918: log(L_ERR "Netlink recvmsg: %m");
1919: return 0;
1920: }
1921: if (sa.nl_pid) /* It isn't from the kernel */
1922: {
1923: DBG("Non-kernel packet\n");
1924: return 1;
1925: }
1926: h = (void *) nl_async_rx_buffer;
1927: len = x;
1928: if (m.msg_flags & MSG_TRUNC)
1929: {
1930: log(L_WARN "Netlink got truncated asynchronous message");
1931: return 1;
1932: }
1933: while (NLMSG_OK(h, len))
1934: {
1935: nl_async_msg(h);
1936: h = NLMSG_NEXT(h, len);
1937: }
1938: if (len)
1939: log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
1940: return 1;
1941: }
1942:
1943: static void
1944: nl_async_err_hook(sock *sk, int e UNUSED)
1945: {
1946: nl_async_hook(sk, 0);
1947: }
1948:
1949: static void
1950: nl_open_async(void)
1951: {
1952: sock *sk;
1953: struct sockaddr_nl sa;
1954: int fd;
1955:
1956: if (nl_async_sk)
1957: return;
1958:
1959: DBG("KRT: Opening async netlink socket\n");
1960:
1961: fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1962: if (fd < 0)
1963: {
1964: log(L_ERR "Unable to open asynchronous rtnetlink socket: %m");
1965: return;
1966: }
1967:
1968: bzero(&sa, sizeof(sa));
1969: sa.nl_family = AF_NETLINK;
1970: sa.nl_groups = RTMGRP_LINK |
1971: RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE |
1972: RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_ROUTE;
1973:
1974: if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
1975: {
1976: log(L_ERR "Unable to bind asynchronous rtnetlink socket: %m");
1977: close(fd);
1978: return;
1979: }
1980:
1981: nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
1982:
1983: sk = nl_async_sk = sk_new(krt_pool);
1984: sk->type = SK_MAGIC;
1985: sk->rx_hook = nl_async_hook;
1986: sk->err_hook = nl_async_err_hook;
1987: sk->fd = fd;
1988: if (sk_open(sk) < 0)
1989: bug("Netlink: sk_open failed");
1990: }
1991:
1992:
1993: /*
1994: * Interface to the UNIX krt module
1995: */
1996:
1997: void
1998: krt_sys_io_init(void)
1999: {
2000: nl_linpool = lp_new_default(krt_pool);
2001: HASH_INIT(nl_table_map, krt_pool, 6);
2002: }
2003:
2004: int
2005: krt_sys_start(struct krt_proto *p)
2006: {
2007: struct krt_proto *old = HASH_FIND(nl_table_map, RTH, p->af, krt_table_id(p));
2008:
2009: if (old)
2010: {
2011: log(L_ERR "%s: Kernel table %u already registered by %s",
2012: p->p.name, krt_table_id(p), old->p.name);
2013: return 0;
2014: }
2015:
2016: HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
2017:
2018: nl_open();
2019: nl_open_async();
2020:
2021: return 1;
2022: }
2023:
2024: void
2025: krt_sys_shutdown(struct krt_proto *p)
2026: {
2027: HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
2028: }
2029:
2030: int
2031: krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
2032: {
2033: return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
2034: }
2035:
2036: void
2037: krt_sys_init_config(struct krt_config *cf)
2038: {
2039: cf->sys.table_id = RT_TABLE_MAIN;
2040: cf->sys.metric = 32;
2041: }
2042:
2043: void
2044: krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
2045: {
2046: d->sys.table_id = s->sys.table_id;
2047: d->sys.metric = s->sys.metric;
2048: }
2049:
2050: static const char *krt_metrics_names[KRT_METRICS_MAX] = {
2051: NULL, "lock", "mtu", "window", "rtt", "rttvar", "sstresh", "cwnd", "advmss",
2052: "reordering", "hoplimit", "initcwnd", "features", "rto_min", "initrwnd", "quickack"
2053: };
2054:
2055: static const char *krt_features_names[KRT_FEATURES_MAX] = {
2056: "ecn", NULL, NULL, "allfrag"
2057: };
2058:
2059: int
2060: krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
2061: {
2062: switch (a->id)
2063: {
2064: case EA_KRT_PREFSRC:
2065: bsprintf(buf, "prefsrc");
2066: return GA_NAME;
2067:
2068: case EA_KRT_REALM:
2069: bsprintf(buf, "realm");
2070: return GA_NAME;
2071:
2072: case EA_KRT_SCOPE:
2073: bsprintf(buf, "scope");
2074: return GA_NAME;
2075:
2076: case EA_KRT_LOCK:
2077: buf += bsprintf(buf, "lock:");
2078: ea_format_bitfield(a, buf, buflen, krt_metrics_names, 2, KRT_METRICS_MAX);
2079: return GA_FULL;
2080:
2081: case EA_KRT_FEATURES:
2082: buf += bsprintf(buf, "features:");
2083: ea_format_bitfield(a, buf, buflen, krt_features_names, 0, KRT_FEATURES_MAX);
2084: return GA_FULL;
2085:
2086: default:;
2087: int id = (int)EA_ID(a->id) - KRT_METRICS_OFFSET;
2088: if (id > 0 && id < KRT_METRICS_MAX)
2089: {
2090: bsprintf(buf, "%s", krt_metrics_names[id]);
2091: return GA_NAME;
2092: }
2093:
2094: return GA_UNKNOWN;
2095: }
2096: }
2097:
2098:
2099:
2100: void
2101: kif_sys_start(struct kif_proto *p UNUSED)
2102: {
2103: nl_open();
2104: nl_open_async();
2105: }
2106:
2107: void
2108: kif_sys_shutdown(struct kif_proto *p UNUSED)
2109: {
2110: }
2111:
2112: int
2113: kif_update_sysdep_addr(struct iface *i UNUSED)
2114: {
2115: return 0;
2116: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>