Annotation of embedaddon/bird2/proto/bgp/attrs.c, revision 1.1.1.1
1.1 misho 1: /*
2: * BIRD -- BGP Attributes
3: *
4: * (c) 2000 Martin Mares <mj@ucw.cz>
5: * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6: * (c) 2008--2016 CZ.NIC z.s.p.o.
7: *
8: * Can be freely distributed and used under the terms of the GNU GPL.
9: */
10:
11: #undef LOCAL_DEBUG
12:
13: #include <stdlib.h>
14:
15: #include "nest/bird.h"
16: #include "nest/iface.h"
17: #include "nest/protocol.h"
18: #include "nest/route.h"
19: #include "nest/attrs.h"
20: #include "conf/conf.h"
21: #include "lib/resource.h"
22: #include "lib/string.h"
23: #include "lib/unaligned.h"
24:
25: #include "bgp.h"
26:
27: /*
28: * UPDATE message error handling
29: *
30: * All checks from RFC 4271 6.3 are done as specified with these exceptions:
31: * - The semantic check of an IP address from NEXT_HOP attribute is missing.
32: * - Checks of some optional attribute values are missing.
33: * - Syntactic and semantic checks of NLRIs (done in DECODE_PREFIX())
34: * are probably inadequate.
35: *
36: * Loop detection based on AS_PATH causes updates to be withdrawn. RFC
37: * 4271 does not explicitly specify the behavior in that case.
38: *
39: * Loop detection related to route reflection (based on ORIGINATOR_ID
40: * and CLUSTER_LIST) causes updates to be withdrawn. RFC 4456 8
41: * specifies that such updates should be ignored, but that is generally
42: * a bad idea.
43: *
44: * BGP attribute table has several hooks:
45: *
46: * export - Hook that validates and normalizes attribute during export phase.
47: * Receives eattr, may modify it (e.g., sort community lists for canonical
48: * representation), UNSET() it (e.g., skip empty lists), or WITHDRAW() it if
49: * necessary. May assume that eattr has value valid w.r.t. its type, but may be
50: * invalid w.r.t. BGP constraints. Optional.
51: *
52: * encode - Hook that converts internal representation to external one during
53: * packet writing. Receives eattr and puts it in the buffer (including attribute
54: * header). Returns number of bytes, or -1 if not enough space. May assume that
55: * eattr has value valid w.r.t. its type and validated by export hook. Mandatory
56: * for all known attributes that exist internally after export phase (i.e., all
57: * except pseudoattributes MP_(UN)REACH_NLRI).
58: *
59: * decode - Hook that converts external representation to internal one during
60: * packet parsing. Receives attribute data in buffer, validates it and adds
61: * attribute to ea_list. If data are invalid, steps DISCARD(), WITHDRAW() or
62: * bgp_parse_error() may be used to escape. Mandatory for all known attributes.
63: *
64: * format - Optional hook that converts eattr to textual representation.
65: */
66:
67:
68: struct bgp_attr_desc {
69: const char *name;
70: uint type;
71: uint flags;
72: void (*export)(struct bgp_export_state *s, eattr *a);
73: int (*encode)(struct bgp_write_state *s, eattr *a, byte *buf, uint size);
74: void (*decode)(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to);
75: void (*format)(eattr *ea, byte *buf, uint size);
76: };
77:
78: static const struct bgp_attr_desc bgp_attr_table[];
79:
80: static inline int bgp_attr_known(uint code);
81:
82: eattr *
83: bgp_set_attr(ea_list **attrs, struct linpool *pool, uint code, uint flags, uintptr_t val)
84: {
85: ASSERT(bgp_attr_known(code));
86:
87: return ea_set_attr(
88: attrs,
89: pool,
90: EA_CODE(PROTOCOL_BGP, code),
91: flags,
92: bgp_attr_table[code].type,
93: val
94: );
95: }
96:
97:
98:
99: #define REPORT(msg, args...) \
100: ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
101:
102: #define DISCARD(msg, args...) \
103: ({ REPORT(msg, ## args); return; })
104:
105: #define WITHDRAW(msg, args...) \
106: ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
107:
108: #define UNSET(a) \
109: ({ a->type = EAF_TYPE_UNDEF; return; })
110:
111: #define NEW_BGP "Discarding %s attribute received from AS4-aware neighbor"
112: #define BAD_EBGP "Discarding %s attribute received from EBGP neighbor"
113: #define BAD_LENGTH "Malformed %s attribute - invalid length (%u)"
114: #define BAD_VALUE "Malformed %s attribute - invalid value (%u)"
115: #define NO_MANDATORY "Missing mandatory %s attribute"
116:
117:
118: static inline int
119: bgp_put_attr_hdr3(byte *buf, uint code, uint flags, uint len)
120: {
121: *buf++ = flags;
122: *buf++ = code;
123: *buf++ = len;
124: return 3;
125: }
126:
127: static inline int
128: bgp_put_attr_hdr4(byte *buf, uint code, uint flags, uint len)
129: {
130: *buf++ = flags | BAF_EXT_LEN;
131: *buf++ = code;
132: put_u16(buf, len);
133: return 4;
134: }
135:
136: static inline int
137: bgp_put_attr_hdr(byte *buf, uint code, uint flags, uint len)
138: {
139: if (len < 256)
140: return bgp_put_attr_hdr3(buf, code, flags, len);
141: else
142: return bgp_put_attr_hdr4(buf, code, flags, len);
143: }
144:
145: static int
146: bgp_encode_u8(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
147: {
148: if (size < (3+1))
149: return -1;
150:
151: bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 1);
152: buf[3] = a->u.data;
153:
154: return 3+1;
155: }
156:
157: static int
158: bgp_encode_u32(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
159: {
160: if (size < (3+4))
161: return -1;
162:
163: bgp_put_attr_hdr3(buf, EA_ID(a->id), a->flags, 4);
164: put_u32(buf+3, a->u.data);
165:
166: return 3+4;
167: }
168:
169: static int
170: bgp_encode_u32s(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
171: {
172: uint len = a->u.ptr->length;
173:
174: if (size < (4+len))
175: return -1;
176:
177: uint hdr = bgp_put_attr_hdr(buf, EA_ID(a->id), a->flags, len);
178: put_u32s(buf + hdr, (u32 *) a->u.ptr->data, len / 4);
179:
180: return hdr + len;
181: }
182:
183: static int
184: bgp_put_attr(byte *buf, uint size, uint code, uint flags, const byte *data, uint len)
185: {
186: if (size < (4+len))
187: return -1;
188:
189: uint hdr = bgp_put_attr_hdr(buf, code, flags, len);
190: memcpy(buf + hdr, data, len);
191:
192: return hdr + len;
193: }
194:
195: static int
196: bgp_encode_raw(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size)
197: {
198: return bgp_put_attr(buf, size, EA_ID(a->id), a->flags, a->u.ptr->data, a->u.ptr->length);
199: }
200:
201:
202: /*
203: * AIGP handling
204: */
205:
206: static int
207: bgp_aigp_valid(byte *data, uint len, char *err, uint elen)
208: {
209: byte *pos = data;
210: char *err_dsc = NULL;
211: uint err_val = 0;
212:
213: #define BAD(DSC,VAL) ({ err_dsc = DSC; err_val = VAL; goto bad; })
214: while (len)
215: {
216: if (len < 3)
217: BAD("TLV framing error", len);
218:
219: /* Process one TLV */
220: uint ptype = pos[0];
221: uint plen = get_u16(pos + 1);
222:
223: if (len < plen)
224: BAD("TLV framing error", plen);
225:
226: if (plen < 3)
227: BAD("Bad TLV length", plen);
228:
229: if ((ptype == BGP_AIGP_METRIC) && (plen != 11))
230: BAD("Bad AIGP TLV length", plen);
231:
232: ADVANCE(pos, len, plen);
233: }
234: #undef BAD
235:
236: return 1;
237:
238: bad:
239: if (err)
240: if (bsnprintf(err, elen, "%s (%u) at %d", err_dsc, err_val, (int) (pos - data)) < 0)
241: err[0] = 0;
242:
243: return 0;
244: }
245:
246: static const byte *
247: bgp_aigp_get_tlv(const struct adata *ad, uint type)
248: {
249: if (!ad)
250: return NULL;
251:
252: uint len = ad->length;
253: const byte *pos = ad->data;
254:
255: while (len)
256: {
257: uint ptype = pos[0];
258: uint plen = get_u16(pos + 1);
259:
260: if (ptype == type)
261: return pos;
262:
263: ADVANCE(pos, len, plen);
264: }
265:
266: return NULL;
267: }
268:
269: static const struct adata *
270: bgp_aigp_set_tlv(struct linpool *pool, const struct adata *ad, uint type, byte *data, uint dlen)
271: {
272: uint len = ad ? ad->length : 0;
273: const byte *pos = ad ? ad->data : NULL;
274: struct adata *res = lp_alloc_adata(pool, len + 3 + dlen);
275: byte *dst = res->data;
276: byte *tlv = NULL;
277: int del = 0;
278:
279: while (len)
280: {
281: uint ptype = pos[0];
282: uint plen = get_u16(pos + 1);
283:
284: /* Find position for new TLV */
285: if ((ptype >= type) && !tlv)
286: {
287: tlv = dst;
288: dst += 3 + dlen;
289: }
290:
291: /* Skip first matching TLV, copy others */
292: if ((ptype == type) && !del)
293: del = 1;
294: else
295: {
296: memcpy(dst, pos, plen);
297: dst += plen;
298: }
299:
300: ADVANCE(pos, len, plen);
301: }
302:
303: if (!tlv)
304: {
305: tlv = dst;
306: dst += 3 + dlen;
307: }
308:
309: /* Store the TLD */
310: put_u8(tlv + 0, type);
311: put_u16(tlv + 1, 3 + dlen);
312: memcpy(tlv + 3, data, dlen);
313:
314: /* Update length */
315: res->length = dst - res->data;
316:
317: return res;
318: }
319:
320: static u64 UNUSED
321: bgp_aigp_get_metric(const struct adata *ad, u64 def)
322: {
323: const byte *b = bgp_aigp_get_tlv(ad, BGP_AIGP_METRIC);
324: return b ? get_u64(b + 3) : def;
325: }
326:
327: static const struct adata *
328: bgp_aigp_set_metric(struct linpool *pool, const struct adata *ad, u64 metric)
329: {
330: byte data[8];
331: put_u64(data, metric);
332: return bgp_aigp_set_tlv(pool, ad, BGP_AIGP_METRIC, data, 8);
333: }
334:
335: int
336: bgp_total_aigp_metric_(rte *e, u64 *metric, const struct adata **ad)
337: {
338: eattr *a = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AIGP));
339: if (!a)
340: return 0;
341:
342: const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
343: if (!b)
344: return 0;
345:
346: u64 aigp = get_u64(b + 3);
347: u64 step = e->attrs->igp_metric;
348:
349: if (!rte_resolvable(e) || (step >= IGP_METRIC_UNKNOWN))
350: step = BGP_AIGP_MAX;
351:
352: if (!step)
353: step = 1;
354:
355: *ad = a->u.ptr;
356: *metric = aigp + step;
357: if (*metric < aigp)
358: *metric = BGP_AIGP_MAX;
359:
360: return 1;
361: }
362:
363: static inline int
364: bgp_init_aigp_metric(rte *e, u64 *metric, const struct adata **ad)
365: {
366: if (e->attrs->source == RTS_BGP)
367: return 0;
368:
369: *metric = rt_get_igp_metric(e);
370: *ad = NULL;
371: return *metric < IGP_METRIC_UNKNOWN;
372: }
373:
374:
375: /*
376: * Attribute hooks
377: */
378:
379: static void
380: bgp_export_origin(struct bgp_export_state *s, eattr *a)
381: {
382: if (a->u.data > 2)
383: WITHDRAW(BAD_VALUE, "ORIGIN", a->u.data);
384: }
385:
386: static void
387: bgp_decode_origin(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
388: {
389: if (len != 1)
390: WITHDRAW(BAD_LENGTH, "ORIGIN", len);
391:
392: if (data[0] > 2)
393: WITHDRAW(BAD_VALUE, "ORIGIN", data[0]);
394:
395: bgp_set_attr_u32(to, s->pool, BA_ORIGIN, flags, data[0]);
396: }
397:
398: static void
399: bgp_format_origin(eattr *a, byte *buf, uint size UNUSED)
400: {
401: static const char *bgp_origin_names[] = { "IGP", "EGP", "Incomplete" };
402:
403: bsprintf(buf, (a->u.data <= 2) ? bgp_origin_names[a->u.data] : "?");
404: }
405:
406:
407: static int
408: bgp_encode_as_path(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
409: {
410: const byte *data = a->u.ptr->data;
411: uint len = a->u.ptr->length;
412:
413: if (!s->as4_session)
414: {
415: /* Prepare 16-bit AS_PATH (from 32-bit one) in a temporary buffer */
416: byte *dst = alloca(len);
417: len = as_path_32to16(dst, data, len);
418: data = dst;
419: }
420:
421: return bgp_put_attr(buf, size, BA_AS_PATH, a->flags, data, len);
422: }
423:
424: static void
425: bgp_decode_as_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
426: {
427: struct bgp_proto *p = s->proto;
428: int as_length = s->as4_session ? 4 : 2;
429: int as_confed = p->cf->confederation && p->is_interior;
430: char err[128];
431:
432: if (!as_path_valid(data, len, as_length, as_confed, err, sizeof(err)))
433: WITHDRAW("Malformed AS_PATH attribute - %s", err);
434:
435: /* In some circumstances check for initial AS_CONFED_SEQUENCE; RFC 5065 5.0 */
436: if (p->is_interior && !p->is_internal &&
437: ((len < 2) || (data[0] != AS_PATH_CONFED_SEQUENCE)))
438: WITHDRAW("Malformed AS_PATH attribute - %s", "missing initial AS_CONFED_SEQUENCE");
439:
440: if (!s->as4_session)
441: {
442: /* Prepare 32-bit AS_PATH (from 16-bit one) in a temporary buffer */
443: byte *src = data;
444: data = alloca(2*len);
445: len = as_path_16to32(data, src, len);
446: }
447:
448: bgp_set_attr_data(to, s->pool, BA_AS_PATH, flags, data, len);
449: }
450:
451:
452: static int
453: bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
454: {
455: /*
456: * The NEXT_HOP attribute is used only in traditional (IPv4) BGP. In MP-BGP,
457: * the next hop is encoded as a part of the MP_REACH_NLRI attribute, so we
458: * store it and encode it later by AFI-specific hooks.
459: */
460:
461: if (!s->mp_reach)
462: {
463: // ASSERT(a->u.ptr->length == sizeof(ip_addr));
464:
465: /* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */
466: ip_addr *addr = (void *) a->u.ptr->data;
467: if ((a->u.ptr->length != sizeof(ip_addr)) || !ipa_is_ip4(*addr))
468: return 0;
469:
470: if (size < (3+4))
471: return -1;
472:
473: bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
474: put_ip4(buf+3, ipa_to_ip4(*addr));
475:
476: return 3+4;
477: }
478: else
479: {
480: s->mp_next_hop = a;
481: return 0;
482: }
483: }
484:
485: static void
486: bgp_decode_next_hop(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
487: {
488: if (len != 4)
489: WITHDRAW(BAD_LENGTH, "NEXT_HOP", len);
490:
491: /* Semantic checks are done later */
492: s->ip_next_hop_len = len;
493: s->ip_next_hop_data = data;
494: }
495:
496: /* TODO: This function should use AF-specific hook */
497: static void
498: bgp_format_next_hop(eattr *a, byte *buf, uint size UNUSED)
499: {
500: ip_addr *nh = (void *) a->u.ptr->data;
501: uint len = a->u.ptr->length;
502:
503: ASSERT((len == 16) || (len == 32));
504:
505: /* in IPv6, we may have two addresses in NEXT HOP */
506: if ((len == 16) || ipa_zero(nh[1]))
507: bsprintf(buf, "%I", nh[0]);
508: else
509: bsprintf(buf, "%I %I", nh[0], nh[1]);
510: }
511:
512:
513: static void
514: bgp_decode_med(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
515: {
516: if (len != 4)
517: WITHDRAW(BAD_LENGTH, "MULTI_EXIT_DISC", len);
518:
519: u32 val = get_u32(data);
520: bgp_set_attr_u32(to, s->pool, BA_MULTI_EXIT_DISC, flags, val);
521: }
522:
523:
524: static void
525: bgp_export_local_pref(struct bgp_export_state *s, eattr *a)
526: {
527: if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
528: UNSET(a);
529: }
530:
531: static void
532: bgp_decode_local_pref(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
533: {
534: if (!s->proto->is_interior && !s->proto->cf->allow_local_pref)
535: DISCARD(BAD_EBGP, "LOCAL_PREF");
536:
537: if (len != 4)
538: WITHDRAW(BAD_LENGTH, "LOCAL_PREF", len);
539:
540: u32 val = get_u32(data);
541: bgp_set_attr_u32(to, s->pool, BA_LOCAL_PREF, flags, val);
542: }
543:
544:
545: static void
546: bgp_decode_atomic_aggr(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data UNUSED, uint len, ea_list **to)
547: {
548: if (len != 0)
549: DISCARD(BAD_LENGTH, "ATOMIC_AGGR", len);
550:
551: bgp_set_attr_data(to, s->pool, BA_ATOMIC_AGGR, flags, NULL, 0);
552: }
553:
554: static int
555: bgp_encode_aggregator(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
556: {
557: const byte *data = a->u.ptr->data;
558: uint len = a->u.ptr->length;
559:
560: if (!s->as4_session)
561: {
562: /* Prepare 16-bit AGGREGATOR (from 32-bit one) in a temporary buffer */
563: byte *dst = alloca(6);
564: len = aggregator_32to16(dst, data);
565: }
566:
567: return bgp_put_attr(buf, size, BA_AGGREGATOR, a->flags, data, len);
568: }
569:
570: static void
571: bgp_decode_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
572: {
573: if (len != (s->as4_session ? 8 : 6))
574: DISCARD(BAD_LENGTH, "AGGREGATOR", len);
575:
576: if (!s->as4_session)
577: {
578: /* Prepare 32-bit AGGREGATOR (from 16-bit one) in a temporary buffer */
579: byte *src = data;
580: data = alloca(8);
581: len = aggregator_16to32(data, src);
582: }
583:
584: bgp_set_attr_data(to, s->pool, BA_AGGREGATOR, flags, data, len);
585: }
586:
587: static void
588: bgp_format_aggregator(eattr *a, byte *buf, uint size UNUSED)
589: {
590: const byte *data = a->u.ptr->data;
591:
592: bsprintf(buf, "%I4 AS%u", get_ip4(data+4), get_u32(data+0));
593: }
594:
595:
596: static void
597: bgp_export_community(struct bgp_export_state *s, eattr *a)
598: {
599: if (a->u.ptr->length == 0)
600: UNSET(a);
601:
602: a->u.ptr = int_set_sort(s->pool, a->u.ptr);
603: }
604:
605: static void
606: bgp_decode_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
607: {
608: if (!len || (len % 4))
609: WITHDRAW(BAD_LENGTH, "COMMUNITY", len);
610:
611: struct adata *ad = lp_alloc_adata(s->pool, len);
612: get_u32s(data, (u32 *) ad->data, len / 4);
613: bgp_set_attr_ptr(to, s->pool, BA_COMMUNITY, flags, ad);
614: }
615:
616:
617: static void
618: bgp_export_originator_id(struct bgp_export_state *s, eattr *a)
619: {
620: if (!s->proto->is_internal)
621: UNSET(a);
622: }
623:
624: static void
625: bgp_decode_originator_id(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
626: {
627: if (!s->proto->is_internal)
628: DISCARD(BAD_EBGP, "ORIGINATOR_ID");
629:
630: if (len != 4)
631: WITHDRAW(BAD_LENGTH, "ORIGINATOR_ID", len);
632:
633: u32 val = get_u32(data);
634: bgp_set_attr_u32(to, s->pool, BA_ORIGINATOR_ID, flags, val);
635: }
636:
637:
638: static void
639: bgp_export_cluster_list(struct bgp_export_state *s UNUSED, eattr *a)
640: {
641: if (!s->proto->is_internal)
642: UNSET(a);
643:
644: if (a->u.ptr->length == 0)
645: UNSET(a);
646: }
647:
648: static void
649: bgp_decode_cluster_list(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
650: {
651: if (!s->proto->is_internal)
652: DISCARD(BAD_EBGP, "CLUSTER_LIST");
653:
654: if (!len || (len % 4))
655: WITHDRAW(BAD_LENGTH, "CLUSTER_LIST", len);
656:
657: struct adata *ad = lp_alloc_adata(s->pool, len);
658: get_u32s(data, (u32 *) ad->data, len / 4);
659: bgp_set_attr_ptr(to, s->pool, BA_CLUSTER_LIST, flags, ad);
660: }
661:
662: static void
663: bgp_format_cluster_list(eattr *a, byte *buf, uint size)
664: {
665: /* Truncates cluster lists larger than buflen, probably not a problem */
666: int_set_format(a->u.ptr, 0, -1, buf, size);
667: }
668:
669:
670: static inline u32
671: get_af3(byte *buf)
672: {
673: return (get_u16(buf) << 16) | buf[2];
674: }
675:
676: static void
677: bgp_decode_mp_reach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
678: {
679: /*
680: * 2 B MP_REACH_NLRI data - Address Family Identifier
681: * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
682: * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
683: * var MP_REACH_NLRI data - Network Address of Next Hop
684: * 1 B MP_REACH_NLRI data - Reserved (zero)
685: * var MP_REACH_NLRI data - Network Layer Reachability Information
686: */
687:
688: if ((len < 5) || (len < (5 + (uint) data[3])))
689: bgp_parse_error(s, 9);
690:
691: s->mp_reach_af = get_af3(data);
692: s->mp_next_hop_len = data[3];
693: s->mp_next_hop_data = data + 4;
694: s->mp_reach_len = len - 5 - s->mp_next_hop_len;
695: s->mp_reach_nlri = data + 5 + s->mp_next_hop_len;
696: }
697:
698:
699: static void
700: bgp_decode_mp_unreach_nlri(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data, uint len, ea_list **to UNUSED)
701: {
702: /*
703: * 2 B MP_UNREACH_NLRI data - Address Family Identifier
704: * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
705: * var MP_UNREACH_NLRI data - Network Layer Reachability Information
706: */
707:
708: if (len < 3)
709: bgp_parse_error(s, 9);
710:
711: s->mp_unreach_af = get_af3(data);
712: s->mp_unreach_len = len - 3;
713: s->mp_unreach_nlri = data + 3;
714: }
715:
716:
717: static void
718: bgp_export_ext_community(struct bgp_export_state *s, eattr *a)
719: {
720: struct adata *ad = ec_set_del_nontrans(s->pool, a->u.ptr);
721:
722: if (ad->length == 0)
723: UNSET(a);
724:
725: ec_set_sort_x(ad);
726: a->u.ptr = ad;
727: }
728:
729: static void
730: bgp_decode_ext_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
731: {
732: if (!len || (len % 8))
733: WITHDRAW(BAD_LENGTH, "EXT_COMMUNITY", len);
734:
735: struct adata *ad = lp_alloc_adata(s->pool, len);
736: get_u32s(data, (u32 *) ad->data, len / 4);
737: bgp_set_attr_ptr(to, s->pool, BA_EXT_COMMUNITY, flags, ad);
738: }
739:
740:
741: static void
742: bgp_decode_as4_aggregator(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
743: {
744: if (s->as4_session)
745: DISCARD(NEW_BGP, "AS4_AGGREGATOR");
746:
747: if (len != 8)
748: DISCARD(BAD_LENGTH, "AS4_AGGREGATOR", len);
749:
750: bgp_set_attr_data(to, s->pool, BA_AS4_AGGREGATOR, flags, data, len);
751: }
752:
753: static void
754: bgp_decode_as4_path(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
755: {
756: char err[128];
757:
758: if (s->as4_session)
759: DISCARD(NEW_BGP, "AS4_PATH");
760:
761: if (len < 6)
762: DISCARD(BAD_LENGTH, "AS4_PATH", len);
763:
764: if (!as_path_valid(data, len, 4, 1, err, sizeof(err)))
765: DISCARD("Malformed AS4_PATH attribute - %s", err);
766:
767: struct adata *a = lp_alloc_adata(s->pool, len);
768: memcpy(a->data, data, len);
769:
770: /* AS_CONFED* segments are invalid in AS4_PATH; RFC 6793 6 */
771: if (as_path_contains_confed(a))
772: {
773: REPORT("Discarding AS_CONFED* segment from AS4_PATH attribute");
774: a = as_path_strip_confed(s->pool, a);
775: }
776:
777: bgp_set_attr_ptr(to, s->pool, BA_AS4_PATH, flags, a);
778: }
779:
780:
781: static void
782: bgp_export_aigp(struct bgp_export_state *s, eattr *a)
783: {
784: if (!s->channel->cf->aigp)
785: UNSET(a);
786: }
787:
788: static void
789: bgp_decode_aigp(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
790: {
791: char err[128];
792:
793: /* Acceptability test postponed to bgp_finish_attrs() */
794:
795: if ((flags ^ bgp_attr_table[BA_AIGP].flags) & (BAF_OPTIONAL | BAF_TRANSITIVE))
796: DISCARD("Malformed AIGP attribute - conflicting flags (%02x)", flags);
797:
798: if (!bgp_aigp_valid(data, len, err, sizeof(err)))
799: DISCARD("Malformed AIGP attribute - %s", err);
800:
801: bgp_set_attr_data(to, s->pool, BA_AIGP, flags, data, len);
802: }
803:
804: static void
805: bgp_format_aigp(eattr *a, byte *buf, uint size UNUSED)
806: {
807: const byte *b = bgp_aigp_get_tlv(a->u.ptr, BGP_AIGP_METRIC);
808:
809: if (!b)
810: bsprintf(buf, "?");
811: else
812: bsprintf(buf, "%lu", get_u64(b + 3));
813: }
814:
815:
816: static void
817: bgp_export_large_community(struct bgp_export_state *s, eattr *a)
818: {
819: if (a->u.ptr->length == 0)
820: UNSET(a);
821:
822: a->u.ptr = lc_set_sort(s->pool, a->u.ptr);
823: }
824:
825: static void
826: bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint flags, byte *data, uint len, ea_list **to)
827: {
828: if (!len || (len % 12))
829: WITHDRAW(BAD_LENGTH, "LARGE_COMMUNITY", len);
830:
831: struct adata *ad = lp_alloc_adata(s->pool, len);
832: get_u32s(data, (u32 *) ad->data, len / 4);
833: bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
834: }
835:
836: static void
837: bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
838: {
839: net_addr *n = s->route->net->n.addr;
840: u32 *labels = (u32 *) a->u.ptr->data;
841: uint lnum = a->u.ptr->length / 4;
842:
843: /* Perhaps we should just ignore it? */
844: if (!s->mpls)
845: WITHDRAW("Unexpected MPLS stack");
846:
847: /* Empty MPLS stack is not allowed */
848: if (!lnum)
849: WITHDRAW("Malformed MPLS stack - empty");
850:
851: /* This is ugly, but we must ensure that labels fit into NLRI field */
852: if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
853: WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
854:
855: for (uint i = 0; i < lnum; i++)
856: {
857: if (labels[i] > 0xfffff)
858: WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
859:
860: /* TODO: Check for special-purpose label values? */
861: }
862: }
863:
864: static int
865: bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
866: {
867: /*
868: * MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
869: * so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
870: */
871:
872: s->mpls_labels = a->u.ptr;
873: return 0;
874: }
875:
876: static void
877: bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
878: {
879: DISCARD("Discarding received attribute #0");
880: }
881:
882: static void
883: bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size)
884: {
885: u32 *labels = (u32 *) a->u.ptr->data;
886: uint lnum = a->u.ptr->length / 4;
887: char *pos = buf;
888:
889: for (uint i = 0; i < lnum; i++)
890: {
891: if (size < 20)
892: {
893: bsprintf(pos, "...");
894: return;
895: }
896:
897: uint l = bsprintf(pos, "%d/", labels[i]);
898: ADVANCE(pos, size, l);
899: }
900:
901: /* Clear last slash or terminate empty string */
902: pos[lnum ? -1 : 0] = 0;
903: }
904:
905: static inline void
906: bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
907: {
908: /* Cannot use bgp_set_attr_data() as it works on known attributes only */
909: ea_set_attr_data(to, s->pool, EA_CODE(PROTOCOL_BGP, code), flags, EAF_TYPE_OPAQUE, data, len);
910: }
911:
912:
913: /*
914: * Attribute table
915: */
916:
917: static const struct bgp_attr_desc bgp_attr_table[] = {
918: [BA_ORIGIN] = {
919: .name = "origin",
920: .type = EAF_TYPE_INT,
921: .flags = BAF_TRANSITIVE,
922: .export = bgp_export_origin,
923: .encode = bgp_encode_u8,
924: .decode = bgp_decode_origin,
925: .format = bgp_format_origin,
926: },
927: [BA_AS_PATH] = {
928: .name = "as_path",
929: .type = EAF_TYPE_AS_PATH,
930: .flags = BAF_TRANSITIVE,
931: .encode = bgp_encode_as_path,
932: .decode = bgp_decode_as_path,
933: },
934: [BA_NEXT_HOP] = {
935: .name = "next_hop",
936: .type = EAF_TYPE_IP_ADDRESS,
937: .flags = BAF_TRANSITIVE,
938: .encode = bgp_encode_next_hop,
939: .decode = bgp_decode_next_hop,
940: .format = bgp_format_next_hop,
941: },
942: [BA_MULTI_EXIT_DISC] = {
943: .name = "med",
944: .type = EAF_TYPE_INT,
945: .flags = BAF_OPTIONAL,
946: .encode = bgp_encode_u32,
947: .decode = bgp_decode_med,
948: },
949: [BA_LOCAL_PREF] = {
950: .name = "local_pref",
951: .type = EAF_TYPE_INT,
952: .flags = BAF_TRANSITIVE,
953: .export = bgp_export_local_pref,
954: .encode = bgp_encode_u32,
955: .decode = bgp_decode_local_pref,
956: },
957: [BA_ATOMIC_AGGR] = {
958: .name = "atomic_aggr",
959: .type = EAF_TYPE_OPAQUE,
960: .flags = BAF_TRANSITIVE,
961: .encode = bgp_encode_raw,
962: .decode = bgp_decode_atomic_aggr,
963: },
964: [BA_AGGREGATOR] = {
965: .name = "aggregator",
966: .type = EAF_TYPE_OPAQUE,
967: .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
968: .encode = bgp_encode_aggregator,
969: .decode = bgp_decode_aggregator,
970: .format = bgp_format_aggregator,
971: },
972: [BA_COMMUNITY] = {
973: .name = "community",
974: .type = EAF_TYPE_INT_SET,
975: .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
976: .export = bgp_export_community,
977: .encode = bgp_encode_u32s,
978: .decode = bgp_decode_community,
979: },
980: [BA_ORIGINATOR_ID] = {
981: .name = "originator_id",
982: .type = EAF_TYPE_ROUTER_ID,
983: .flags = BAF_OPTIONAL,
984: .export = bgp_export_originator_id,
985: .encode = bgp_encode_u32,
986: .decode = bgp_decode_originator_id,
987: },
988: [BA_CLUSTER_LIST] = {
989: .name = "cluster_list",
990: .type = EAF_TYPE_INT_SET,
991: .flags = BAF_OPTIONAL,
992: .export = bgp_export_cluster_list,
993: .encode = bgp_encode_u32s,
994: .decode = bgp_decode_cluster_list,
995: .format = bgp_format_cluster_list,
996: },
997: [BA_MP_REACH_NLRI] = {
998: .name = "mp_reach_nlri",
999: .type = EAF_TYPE_OPAQUE,
1000: .flags = BAF_OPTIONAL,
1001: .decode = bgp_decode_mp_reach_nlri,
1002: },
1003: [BA_MP_UNREACH_NLRI] = {
1004: .name = "mp_unreach_nlri",
1005: .type = EAF_TYPE_OPAQUE,
1006: .flags = BAF_OPTIONAL,
1007: .decode = bgp_decode_mp_unreach_nlri,
1008: },
1009: [BA_EXT_COMMUNITY] = {
1010: .name = "ext_community",
1011: .type = EAF_TYPE_EC_SET,
1012: .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1013: .export = bgp_export_ext_community,
1014: .encode = bgp_encode_u32s,
1015: .decode = bgp_decode_ext_community,
1016: },
1017: [BA_AS4_PATH] = {
1018: .name = "as4_path",
1019: .type = EAF_TYPE_AS_PATH,
1020: .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1021: .encode = bgp_encode_raw,
1022: .decode = bgp_decode_as4_path,
1023: },
1024: [BA_AS4_AGGREGATOR] = {
1025: .name = "as4_aggregator",
1026: .type = EAF_TYPE_OPAQUE,
1027: .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1028: .encode = bgp_encode_raw,
1029: .decode = bgp_decode_as4_aggregator,
1030: .format = bgp_format_aggregator,
1031: },
1032: [BA_AIGP] = {
1033: .name = "aigp",
1034: .type = EAF_TYPE_OPAQUE,
1035: .flags = BAF_OPTIONAL | BAF_DECODE_FLAGS,
1036: .export = bgp_export_aigp,
1037: .encode = bgp_encode_raw,
1038: .decode = bgp_decode_aigp,
1039: .format = bgp_format_aigp,
1040: },
1041: [BA_LARGE_COMMUNITY] = {
1042: .name = "large_community",
1043: .type = EAF_TYPE_LC_SET,
1044: .flags = BAF_OPTIONAL | BAF_TRANSITIVE,
1045: .export = bgp_export_large_community,
1046: .encode = bgp_encode_u32s,
1047: .decode = bgp_decode_large_community,
1048: },
1049: [BA_MPLS_LABEL_STACK] = {
1050: .name = "mpls_label_stack",
1051: .type = EAF_TYPE_INT_SET,
1052: .export = bgp_export_mpls_label_stack,
1053: .encode = bgp_encode_mpls_label_stack,
1054: .decode = bgp_decode_mpls_label_stack,
1055: .format = bgp_format_mpls_label_stack,
1056: },
1057: };
1058:
1059: static inline int
1060: bgp_attr_known(uint code)
1061: {
1062: return (code < ARRAY_SIZE(bgp_attr_table)) && bgp_attr_table[code].name;
1063: }
1064:
1065:
1066: /*
1067: * Attribute export
1068: */
1069:
1070: static inline void
1071: bgp_export_attr(struct bgp_export_state *s, eattr *a, ea_list *to)
1072: {
1073: if (EA_PROTO(a->id) != PROTOCOL_BGP)
1074: return;
1075:
1076: uint code = EA_ID(a->id);
1077:
1078: if (bgp_attr_known(code))
1079: {
1080: const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1081:
1082: /* The flags might have been zero if the attr was added by filters */
1083: a->flags = (a->flags & BAF_PARTIAL) | desc->flags;
1084:
1085: /* Set partial bit if new opt-trans attribute is attached to non-local route */
1086: if ((s->src != NULL) && (a->type & EAF_ORIGINATED) &&
1087: (a->flags & BAF_OPTIONAL) && (a->flags & BAF_TRANSITIVE))
1088: a->flags |= BAF_PARTIAL;
1089:
1090: /* Call specific hook */
1091: CALL(desc->export, s, a);
1092:
1093: /* Attribute might become undefined in hook */
1094: if ((a->type & EAF_TYPE_MASK) == EAF_TYPE_UNDEF)
1095: return;
1096: }
1097: else
1098: {
1099: /* Don't re-export unknown non-transitive attributes */
1100: if (!(a->flags & BAF_TRANSITIVE))
1101: return;
1102:
1103: a->flags |= BAF_PARTIAL;
1104: }
1105:
1106: /* Append updated attribute */
1107: to->attrs[to->count++] = *a;
1108: }
1109:
1110: /**
1111: * bgp_export_attrs - export BGP attributes
1112: * @s: BGP export state
1113: * @attrs: a list of extended attributes
1114: *
1115: * The bgp_export_attrs() function takes a list of attributes and merges it to
1116: * one newly allocated and sorted segment. Attributes are validated and
1117: * normalized by type-specific export hooks and attribute flags are updated.
1118: * Some attributes may be eliminated (e.g. unknown non-tranitive attributes, or
1119: * empty community sets).
1120: *
1121: * Result: one sorted attribute list segment, or NULL if attributes are unsuitable.
1122: */
1123: static inline ea_list *
1124: bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
1125: {
1126: /* Merge the attribute list */
1127: ea_list *new = lp_alloc(s->pool, ea_scan(attrs));
1128: ea_merge(attrs, new);
1129: ea_sort(new);
1130:
1131: uint i, count;
1132: count = new->count;
1133: new->count = 0;
1134:
1135: /* Export each attribute */
1136: for (i = 0; i < count; i++)
1137: bgp_export_attr(s, &new->attrs[i], new);
1138:
1139: if (s->err_withdraw)
1140: return NULL;
1141:
1142: return new;
1143: }
1144:
1145:
1146: /*
1147: * Attribute encoding
1148: */
1149:
1150: static inline int
1151: bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
1152: {
1153: ASSERT(EA_PROTO(a->id) == PROTOCOL_BGP);
1154:
1155: uint code = EA_ID(a->id);
1156:
1157: if (bgp_attr_known(code))
1158: return bgp_attr_table[code].encode(s, a, buf, size);
1159: else
1160: return bgp_encode_raw(s, a, buf, size);
1161: }
1162:
1163: /**
1164: * bgp_encode_attrs - encode BGP attributes
1165: * @s: BGP write state
1166: * @attrs: a list of extended attributes
1167: * @buf: buffer
1168: * @end: buffer end
1169: *
1170: * The bgp_encode_attrs() function takes a list of extended attributes
1171: * and converts it to its BGP representation (a part of an Update message).
1172: * BGP write state may be fake when called from MRT protocol.
1173: *
1174: * Result: Length of the attribute block generated or -1 if not enough space.
1175: */
1176: int
1177: bgp_encode_attrs(struct bgp_write_state *s, ea_list *attrs, byte *buf, byte *end)
1178: {
1179: byte *pos = buf;
1180: int i, len;
1181:
1182: for (i = 0; i < attrs->count; i++)
1183: {
1184: len = bgp_encode_attr(s, &attrs->attrs[i], pos, end - pos);
1185:
1186: if (len < 0)
1187: return -1;
1188:
1189: pos += len;
1190: }
1191:
1192: return pos - buf;
1193: }
1194:
1195:
1196: /*
1197: * Attribute decoding
1198: */
1199:
1200: static void bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool);
1201:
1202: static inline int
1203: bgp_as_path_loopy(struct bgp_proto *p, ea_list *attrs, u32 asn)
1204: {
1205: eattr *e = bgp_find_attr(attrs, BA_AS_PATH);
1206: int num = p->cf->allow_local_as + 1;
1207: return (e && (num > 0) && as_path_contains(e->u.ptr, asn, num));
1208: }
1209:
1210: static inline int
1211: bgp_originator_id_loopy(struct bgp_proto *p, ea_list *attrs)
1212: {
1213: eattr *e = bgp_find_attr(attrs, BA_ORIGINATOR_ID);
1214: return (e && (e->u.data == p->local_id));
1215: }
1216:
1217: static inline int
1218: bgp_cluster_list_loopy(struct bgp_proto *p, ea_list *attrs)
1219: {
1220: eattr *e = bgp_find_attr(attrs, BA_CLUSTER_LIST);
1221: return (e && int_set_contains(e->u.ptr, p->rr_cluster_id));
1222: }
1223:
1224: static inline void
1225: bgp_decode_attr(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
1226: {
1227: /* Handle duplicate attributes; RFC 7606 3 (g) */
1228: if (BIT32_TEST(s->attrs_seen, code))
1229: {
1230: if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
1231: bgp_parse_error(s, 1);
1232: else
1233: DISCARD("Discarding duplicate attribute (code %u)", code);
1234: }
1235: BIT32_SET(s->attrs_seen, code);
1236:
1237: if (bgp_attr_known(code))
1238: {
1239: const struct bgp_attr_desc *desc = &bgp_attr_table[code];
1240:
1241: /* Handle conflicting flags; RFC 7606 3 (c) */
1242: if (((flags ^ desc->flags) & (BAF_OPTIONAL | BAF_TRANSITIVE)) &&
1243: !(desc->flags & BAF_DECODE_FLAGS))
1244: WITHDRAW("Malformed %s attribute - conflicting flags (%02x)", desc->name, flags);
1245:
1246: desc->decode(s, code, flags, data, len, to);
1247: }
1248: else /* Unknown attribute */
1249: {
1250: if (!(flags & BAF_OPTIONAL))
1251: WITHDRAW("Unknown attribute (code %u) - conflicting flags (%02x)", code, flags);
1252:
1253: bgp_decode_unknown(s, code, flags, data, len, to);
1254: }
1255: }
1256:
1257: /**
1258: * bgp_decode_attrs - check and decode BGP attributes
1259: * @s: BGP parse state
1260: * @data: start of attribute block
1261: * @len: length of attribute block
1262: *
1263: * This function takes a BGP attribute block (a part of an Update message), checks
1264: * its consistency and converts it to a list of BIRD route attributes represented
1265: * by an (uncached) &rta.
1266: */
1267: ea_list *
1268: bgp_decode_attrs(struct bgp_parse_state *s, byte *data, uint len)
1269: {
1270: struct bgp_proto *p = s->proto;
1271: ea_list *attrs = NULL;
1272: uint code, flags, alen;
1273: byte *pos = data;
1274:
1275: /* Parse the attributes */
1276: while (len)
1277: {
1278: alen = 0;
1279:
1280: /* Read attribute type */
1281: if (len < 2)
1282: goto framing_error;
1283: flags = pos[0];
1284: code = pos[1];
1285: ADVANCE(pos, len, 2);
1286:
1287: /* Read attribute length */
1288: if (flags & BAF_EXT_LEN)
1289: {
1290: if (len < 2)
1291: goto framing_error;
1292: alen = get_u16(pos);
1293: ADVANCE(pos, len, 2);
1294: }
1295: else
1296: {
1297: if (len < 1)
1298: goto framing_error;
1299: alen = *pos;
1300: ADVANCE(pos, len, 1);
1301: }
1302:
1303: if (alen > len)
1304: goto framing_error;
1305:
1306: DBG("Attr %02x %02x %u\n", code, flags, alen);
1307:
1308: bgp_decode_attr(s, code, flags, pos, alen, &attrs);
1309: ADVANCE(pos, len, alen);
1310: }
1311:
1312: if (s->err_withdraw)
1313: goto withdraw;
1314:
1315: /* If there is no reachability NLRI, we are finished */
1316: if (!s->ip_reach_len && !s->mp_reach_len)
1317: return NULL;
1318:
1319:
1320: /* Handle missing mandatory attributes; RFC 7606 3 (d) */
1321: if (!BIT32_TEST(s->attrs_seen, BA_ORIGIN))
1322: { REPORT(NO_MANDATORY, "ORIGIN"); goto withdraw; }
1323:
1324: if (!BIT32_TEST(s->attrs_seen, BA_AS_PATH))
1325: { REPORT(NO_MANDATORY, "AS_PATH"); goto withdraw; }
1326:
1327: if (s->ip_reach_len && !BIT32_TEST(s->attrs_seen, BA_NEXT_HOP))
1328: { REPORT(NO_MANDATORY, "NEXT_HOP"); goto withdraw; }
1329:
1330: /* When receiving attributes from non-AS4-aware BGP speaker, we have to
1331: reconstruct AS_PATH and AGGREGATOR attributes; RFC 6793 4.2.3 */
1332: if (!p->as4_session)
1333: bgp_process_as4_attrs(&attrs, s->pool);
1334:
1335: /* Reject routes with our ASN in AS_PATH attribute */
1336: if (bgp_as_path_loopy(p, attrs, p->local_as))
1337: goto withdraw;
1338:
1339: /* Reject routes with our Confederation ID in AS_PATH attribute; RFC 5065 4.0 */
1340: if ((p->public_as != p->local_as) && bgp_as_path_loopy(p, attrs, p->public_as))
1341: goto withdraw;
1342:
1343: /* Reject routes with our Router ID in ORIGINATOR_ID attribute; RFC 4456 8 */
1344: if (p->is_internal && bgp_originator_id_loopy(p, attrs))
1345: goto withdraw;
1346:
1347: /* Reject routes with our Cluster ID in CLUSTER_LIST attribute; RFC 4456 8 */
1348: if (p->rr_client && bgp_cluster_list_loopy(p, attrs))
1349: goto withdraw;
1350:
1351: /* If there is no local preference, define one */
1352: if (!BIT32_TEST(s->attrs_seen, BA_LOCAL_PREF))
1353: bgp_set_attr_u32(&attrs, s->pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1354:
1355: return attrs;
1356:
1357:
1358: framing_error:
1359: /* RFC 7606 4 - handle attribute framing errors */
1360: REPORT("Malformed attribute list - framing error (%u/%u) at %d",
1361: alen, len, (int) (pos - s->attrs));
1362:
1363: withdraw:
1364: /* RFC 7606 5.2 - handle missing NLRI during errors */
1365: if (!s->ip_reach_len && !s->mp_reach_len)
1366: bgp_parse_error(s, 1);
1367:
1368: s->err_withdraw = 1;
1369: return NULL;
1370: }
1371:
1372: void
1373: bgp_finish_attrs(struct bgp_parse_state *s, rta *a)
1374: {
1375: /* AIGP test here instead of in bgp_decode_aigp() - we need to know channel */
1376: if (BIT32_TEST(s->attrs_seen, BA_AIGP) && !s->channel->cf->aigp)
1377: {
1378: REPORT("Discarding AIGP attribute received on non-AIGP session");
1379: bgp_unset_attr(&a->eattrs, s->pool, BA_AIGP);
1380: }
1381: }
1382:
1383:
1384: /*
1385: * Route bucket hash table
1386: */
1387:
1388: #define RBH_KEY(b) b->eattrs, b->hash
1389: #define RBH_NEXT(b) b->next
1390: #define RBH_EQ(a1,h1,a2,h2) h1 == h2 && ea_same(a1, a2)
1391: #define RBH_FN(a,h) h
1392:
1393: #define RBH_REHASH bgp_rbh_rehash
1394: #define RBH_PARAMS /8, *2, 2, 2, 8, 20
1395:
1396:
1397: HASH_DEFINE_REHASH_FN(RBH, struct bgp_bucket)
1398:
1399: void
1400: bgp_init_bucket_table(struct bgp_channel *c)
1401: {
1402: HASH_INIT(c->bucket_hash, c->pool, 8);
1403:
1404: init_list(&c->bucket_queue);
1405: c->withdraw_bucket = NULL;
1406: }
1407:
1408: void
1409: bgp_free_bucket_table(struct bgp_channel *c)
1410: {
1411: HASH_FREE(c->bucket_hash);
1412:
1413: struct bgp_bucket *b;
1414: WALK_LIST_FIRST(b, c->bucket_queue)
1415: {
1416: rem_node(&b->send_node);
1417: mb_free(b);
1418: }
1419:
1420: mb_free(c->withdraw_bucket);
1421: c->withdraw_bucket = NULL;
1422: }
1423:
1424: static struct bgp_bucket *
1425: bgp_get_bucket(struct bgp_channel *c, ea_list *new)
1426: {
1427: /* Hash and lookup */
1428: u32 hash = ea_hash(new);
1429: struct bgp_bucket *b = HASH_FIND(c->bucket_hash, RBH, new, hash);
1430:
1431: if (b)
1432: return b;
1433:
1434: uint ea_size = sizeof(ea_list) + new->count * sizeof(eattr);
1435: uint ea_size_aligned = BIRD_ALIGN(ea_size, CPU_STRUCT_ALIGN);
1436: uint size = sizeof(struct bgp_bucket) + ea_size_aligned;
1437: uint i;
1438: byte *dest;
1439:
1440: /* Gather total size of non-inline attributes */
1441: for (i = 0; i < new->count; i++)
1442: {
1443: eattr *a = &new->attrs[i];
1444:
1445: if (!(a->type & EAF_EMBEDDED))
1446: size += BIRD_ALIGN(sizeof(struct adata) + a->u.ptr->length, CPU_STRUCT_ALIGN);
1447: }
1448:
1449: /* Create the bucket */
1450: b = mb_alloc(c->pool, size);
1451: init_list(&b->prefixes);
1452: b->hash = hash;
1453:
1454: /* Copy list of extended attributes */
1455: memcpy(b->eattrs, new, ea_size);
1456: dest = ((byte *) b->eattrs) + ea_size_aligned;
1457:
1458: /* Copy values of non-inline attributes */
1459: for (i = 0; i < new->count; i++)
1460: {
1461: eattr *a = &b->eattrs->attrs[i];
1462:
1463: if (!(a->type & EAF_EMBEDDED))
1464: {
1465: const struct adata *oa = a->u.ptr;
1466: struct adata *na = (struct adata *) dest;
1467: memcpy(na, oa, sizeof(struct adata) + oa->length);
1468: a->u.ptr = na;
1469: dest += BIRD_ALIGN(sizeof(struct adata) + na->length, CPU_STRUCT_ALIGN);
1470: }
1471: }
1472:
1473: /* Insert the bucket to send queue and bucket hash */
1474: add_tail(&c->bucket_queue, &b->send_node);
1475: HASH_INSERT2(c->bucket_hash, RBH, c->pool, b);
1476:
1477: return b;
1478: }
1479:
1480: static struct bgp_bucket *
1481: bgp_get_withdraw_bucket(struct bgp_channel *c)
1482: {
1483: if (!c->withdraw_bucket)
1484: {
1485: c->withdraw_bucket = mb_allocz(c->pool, sizeof(struct bgp_bucket));
1486: init_list(&c->withdraw_bucket->prefixes);
1487: }
1488:
1489: return c->withdraw_bucket;
1490: }
1491:
1492: void
1493: bgp_free_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1494: {
1495: rem_node(&b->send_node);
1496: HASH_REMOVE2(c->bucket_hash, RBH, c->pool, b);
1497: mb_free(b);
1498: }
1499:
1500: void
1501: bgp_defer_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1502: {
1503: rem_node(&b->send_node);
1504: add_tail(&c->bucket_queue, &b->send_node);
1505: }
1506:
1507: void
1508: bgp_withdraw_bucket(struct bgp_channel *c, struct bgp_bucket *b)
1509: {
1510: struct bgp_proto *p = (void *) c->c.proto;
1511: struct bgp_bucket *wb = bgp_get_withdraw_bucket(c);
1512:
1513: log(L_ERR "%s: Attribute list too long", p->p.name);
1514: while (!EMPTY_LIST(b->prefixes))
1515: {
1516: struct bgp_prefix *px = HEAD(b->prefixes);
1517:
1518: log(L_ERR "%s: - withdrawing %N", p->p.name, &px->net);
1519: rem_node(&px->buck_node);
1520: add_tail(&wb->prefixes, &px->buck_node);
1521: }
1522: }
1523:
1524:
1525: /*
1526: * Prefix hash table
1527: */
1528:
1529: #define PXH_KEY(px) px->net, px->path_id, px->hash
1530: #define PXH_NEXT(px) px->next
1531: #define PXH_EQ(n1,i1,h1,n2,i2,h2) h1 == h2 && i1 == i2 && net_equal(n1, n2)
1532: #define PXH_FN(n,i,h) h
1533:
1534: #define PXH_REHASH bgp_pxh_rehash
1535: #define PXH_PARAMS /8, *2, 2, 2, 8, 24
1536:
1537:
1538: HASH_DEFINE_REHASH_FN(PXH, struct bgp_prefix)
1539:
1540: void
1541: bgp_init_prefix_table(struct bgp_channel *c)
1542: {
1543: HASH_INIT(c->prefix_hash, c->pool, 8);
1544:
1545: uint alen = net_addr_length[c->c.net_type];
1546: c->prefix_slab = alen ? sl_new(c->pool, sizeof(struct bgp_prefix) + alen) : NULL;
1547: }
1548:
1549: void
1550: bgp_free_prefix_table(struct bgp_channel *c)
1551: {
1552: HASH_FREE(c->prefix_hash);
1553:
1554: rfree(c->prefix_slab);
1555: c->prefix_slab = NULL;
1556: }
1557:
1558: static struct bgp_prefix *
1559: bgp_get_prefix(struct bgp_channel *c, net_addr *net, u32 path_id)
1560: {
1561: u32 hash = net_hash(net) ^ u32_hash(path_id);
1562: struct bgp_prefix *px = HASH_FIND(c->prefix_hash, PXH, net, path_id, hash);
1563:
1564: if (px)
1565: {
1566: rem_node(&px->buck_node);
1567: return px;
1568: }
1569:
1570: if (c->prefix_slab)
1571: px = sl_alloc(c->prefix_slab);
1572: else
1573: px = mb_alloc(c->pool, sizeof(struct bgp_prefix) + net->length);
1574:
1575: px->buck_node.next = NULL;
1576: px->buck_node.prev = NULL;
1577: px->hash = hash;
1578: px->path_id = path_id;
1579: net_copy(px->net, net);
1580:
1581: HASH_INSERT2(c->prefix_hash, PXH, c->pool, px);
1582:
1583: return px;
1584: }
1585:
1586: void
1587: bgp_free_prefix(struct bgp_channel *c, struct bgp_prefix *px)
1588: {
1589: rem_node(&px->buck_node);
1590: HASH_REMOVE2(c->prefix_hash, PXH, c->pool, px);
1591:
1592: if (c->prefix_slab)
1593: sl_free(c->prefix_slab, px);
1594: else
1595: mb_free(px);
1596: }
1597:
1598:
1599: /*
1600: * BGP protocol glue
1601: */
1602:
1603: int
1604: bgp_preexport(struct proto *P, rte **new, struct linpool *pool UNUSED)
1605: {
1606: rte *e = *new;
1607: struct proto *SRC = e->attrs->src->proto;
1608: struct bgp_proto *p = (struct bgp_proto *) P;
1609: struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (struct bgp_proto *) SRC : NULL;
1610:
1611: /* Reject our routes */
1612: if (src == p)
1613: return -1;
1614:
1615: /* Accept non-BGP routes */
1616: if (src == NULL)
1617: return 0;
1618:
1619: /* IBGP route reflection, RFC 4456 */
1620: if (p->is_internal && src->is_internal && (p->local_as == src->local_as))
1621: {
1622: /* Rejected unless configured as route reflector */
1623: if (!p->rr_client && !src->rr_client)
1624: return -1;
1625:
1626: /* Generally, this should be handled when path is received, but we check it
1627: also here as rr_cluster_id may be undefined or different in src. */
1628: if (p->rr_cluster_id && bgp_cluster_list_loopy(p, e->attrs->eattrs))
1629: return -1;
1630: }
1631:
1632: /* Handle well-known communities, RFC 1997 */
1633: struct eattr *c;
1634: if (p->cf->interpret_communities &&
1635: (c = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY))))
1636: {
1637: const struct adata *d = c->u.ptr;
1638:
1639: /* Do not export anywhere */
1640: if (int_set_contains(d, BGP_COMM_NO_ADVERTISE))
1641: return -1;
1642:
1643: /* Do not export outside of AS (or member-AS) */
1644: if (!p->is_internal && int_set_contains(d, BGP_COMM_NO_EXPORT_SUBCONFED))
1645: return -1;
1646:
1647: /* Do not export outside of AS (or confederation) */
1648: if (!p->is_interior && int_set_contains(d, BGP_COMM_NO_EXPORT))
1649: return -1;
1650:
1651: /* Do not export LLGR_STALE routes to LLGR-ignorant peers */
1652: if (!p->conn->remote_caps->llgr_aware && int_set_contains(d, BGP_COMM_LLGR_STALE))
1653: return -1;
1654: }
1655:
1656: return 0;
1657: }
1658:
1659: static ea_list *
1660: bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *attrs0, struct linpool *pool)
1661: {
1662: struct proto *SRC = e->attrs->src->proto;
1663: struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
1664: struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
1665: ea_list *attrs = attrs0;
1666: eattr *a;
1667: const adata *ad;
1668:
1669: /* ORIGIN attribute - mandatory, attach if missing */
1670: if (! bgp_find_attr(attrs0, BA_ORIGIN))
1671: bgp_set_attr_u32(&attrs, pool, BA_ORIGIN, 0, src ? ORIGIN_INCOMPLETE : ORIGIN_IGP);
1672:
1673: /* AS_PATH attribute - mandatory */
1674: a = bgp_find_attr(attrs0, BA_AS_PATH);
1675: ad = a ? a->u.ptr : &null_adata;
1676:
1677: /* AS_PATH attribute - strip AS_CONFED* segments outside confederation */
1678: if ((!p->cf->confederation || !p->is_interior) && as_path_contains_confed(ad))
1679: ad = as_path_strip_confed(pool, ad);
1680:
1681: /* AS_PATH attribute - keep or prepend ASN */
1682: if (p->is_internal || p->rs_client)
1683: {
1684: /* IBGP or route server -> just ensure there is one */
1685: if (!a)
1686: bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, &null_adata);
1687: }
1688: else if (p->is_interior)
1689: {
1690: /* Confederation -> prepend ASN as AS_CONFED_SEQUENCE */
1691: ad = as_path_prepend2(pool, ad, AS_PATH_CONFED_SEQUENCE, p->public_as);
1692: bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1693: }
1694: else /* Regular EBGP (no RS, no confederation) */
1695: {
1696: /* Regular EBGP -> prepend ASN as regular sequence */
1697: ad = as_path_prepend2(pool, ad, AS_PATH_SEQUENCE, p->public_as);
1698: bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, ad);
1699:
1700: /* MULTI_EXIT_DESC attribute - accept only if set in export filter */
1701: a = bgp_find_attr(attrs0, BA_MULTI_EXIT_DISC);
1702: if (a && !(a->type & EAF_FRESH))
1703: bgp_unset_attr(&attrs, pool, BA_MULTI_EXIT_DISC);
1704: }
1705:
1706: /* NEXT_HOP attribute - delegated to AF-specific hook */
1707: a = bgp_find_attr(attrs0, BA_NEXT_HOP);
1708: bgp_update_next_hop(&s, a, &attrs);
1709:
1710: /* LOCAL_PREF attribute - required for IBGP, attach if missing */
1711: if (p->is_interior && ! bgp_find_attr(attrs0, BA_LOCAL_PREF))
1712: bgp_set_attr_u32(&attrs, pool, BA_LOCAL_PREF, 0, p->cf->default_local_pref);
1713:
1714: /* AIGP attribute - accumulate local metric or originate new one */
1715: u64 metric;
1716: if (s.local_next_hop &&
1717: (bgp_total_aigp_metric_(e, &metric, &ad) ||
1718: (c->cf->aigp_originate && bgp_init_aigp_metric(e, &metric, &ad))))
1719: {
1720: ad = bgp_aigp_set_metric(pool, ad, metric);
1721: bgp_set_attr_ptr(&attrs, pool, BA_AIGP, 0, ad);
1722: }
1723:
1724: /* IBGP route reflection, RFC 4456 */
1725: if (src && src->is_internal && p->is_internal && (src->local_as == p->local_as))
1726: {
1727: /* ORIGINATOR_ID attribute - attach if not already set */
1728: if (! bgp_find_attr(attrs0, BA_ORIGINATOR_ID))
1729: bgp_set_attr_u32(&attrs, pool, BA_ORIGINATOR_ID, 0, src->remote_id);
1730:
1731: /* CLUSTER_LIST attribute - prepend cluster ID */
1732: a = bgp_find_attr(attrs0, BA_CLUSTER_LIST);
1733: ad = a ? a->u.ptr : NULL;
1734:
1735: /* Prepend src cluster ID */
1736: if (src->rr_cluster_id)
1737: ad = int_set_prepend(pool, ad, src->rr_cluster_id);
1738:
1739: /* Prepend dst cluster ID if src and dst clusters are different */
1740: if (p->rr_cluster_id && (src->rr_cluster_id != p->rr_cluster_id))
1741: ad = int_set_prepend(pool, ad, p->rr_cluster_id);
1742:
1743: /* Should be at least one prepended cluster ID */
1744: bgp_set_attr_ptr(&attrs, pool, BA_CLUSTER_LIST, 0, ad);
1745: }
1746:
1747: /* AS4_* transition attributes, RFC 6793 4.2.2 */
1748: if (! p->as4_session)
1749: {
1750: a = bgp_find_attr(attrs, BA_AS_PATH);
1751: if (a && as_path_contains_as4(a->u.ptr))
1752: {
1753: bgp_set_attr_ptr(&attrs, pool, BA_AS_PATH, 0, as_path_to_old(pool, a->u.ptr));
1754: bgp_set_attr_ptr(&attrs, pool, BA_AS4_PATH, 0, as_path_strip_confed(pool, a->u.ptr));
1755: }
1756:
1757: a = bgp_find_attr(attrs, BA_AGGREGATOR);
1758: if (a && aggregator_contains_as4(a->u.ptr))
1759: {
1760: bgp_set_attr_ptr(&attrs, pool, BA_AGGREGATOR, 0, aggregator_to_old(pool, a->u.ptr));
1761: bgp_set_attr_ptr(&attrs, pool, BA_AS4_AGGREGATOR, 0, a->u.ptr);
1762: }
1763: }
1764:
1765: /*
1766: * Presence of mandatory attributes ORIGIN and AS_PATH is ensured by above
1767: * conditions. Presence and validity of quasi-mandatory NEXT_HOP attribute
1768: * should be checked in AF-specific hooks.
1769: */
1770:
1771: /* Apply per-attribute export hooks for validatation and normalization */
1772: return bgp_export_attrs(&s, attrs);
1773: }
1774:
1775: void
1776: bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old)
1777: {
1778: struct bgp_proto *p = (void *) P;
1779: struct bgp_channel *c = (void *) C;
1780: struct bgp_bucket *buck;
1781: struct bgp_prefix *px;
1782: u32 path;
1783:
1784: if (new)
1785: {
1786: struct ea_list *attrs = bgp_update_attrs(p, c, new, new->attrs->eattrs, bgp_linpool2);
1787:
1788: /* If attributes are invalid, we fail back to withdraw */
1789: buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
1790: path = new->attrs->src->global_id;
1791:
1792: lp_flush(bgp_linpool2);
1793: }
1794: else
1795: {
1796: buck = bgp_get_withdraw_bucket(c);
1797: path = old->attrs->src->global_id;
1798: }
1799:
1800: px = bgp_get_prefix(c, n->n.addr, c->add_path_tx ? path : 0);
1801: add_tail(&buck->prefixes, &px->buck_node);
1802:
1803: bgp_schedule_packet(p->conn, c, PKT_UPDATE);
1804: }
1805:
1806:
1807: static inline u32
1808: bgp_get_neighbor(rte *r)
1809: {
1810: eattr *e = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1811: u32 as;
1812:
1813: if (e && as_path_get_first_regular(e->u.ptr, &as))
1814: return as;
1815:
1816: /* If AS_PATH is not defined, we treat rte as locally originated */
1817: struct bgp_proto *p = (void *) r->attrs->src->proto;
1818: return p->cf->confederation ?: p->local_as;
1819: }
1820:
1821: static inline int
1822: rte_stale(rte *r)
1823: {
1824: if (r->u.bgp.stale < 0)
1825: {
1826: /* If staleness is unknown, compute and cache it */
1827: eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
1828: r->u.bgp.stale = a && int_set_contains(a->u.ptr, BGP_COMM_LLGR_STALE);
1829: }
1830:
1831: return r->u.bgp.stale;
1832: }
1833:
1834: int
1835: bgp_rte_better(rte *new, rte *old)
1836: {
1837: struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->src->proto;
1838: struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->src->proto;
1839: eattr *x, *y;
1840: u32 n, o;
1841:
1842: /* Skip suppressed routes (see bgp_rte_recalculate()) */
1843: n = new->u.bgp.suppressed;
1844: o = old->u.bgp.suppressed;
1845: if (n > o)
1846: return 0;
1847: if (n < o)
1848: return 1;
1849:
1850: /* RFC 4271 9.1.2.1. Route resolvability test */
1851: n = rte_resolvable(new);
1852: o = rte_resolvable(old);
1853: if (n > o)
1854: return 1;
1855: if (n < o)
1856: return 0;
1857:
1858: /* LLGR draft - depreference stale routes */
1859: n = rte_stale(new);
1860: o = rte_stale(old);
1861: if (n > o)
1862: return 0;
1863: if (n < o)
1864: return 1;
1865:
1866: /* Start with local preferences */
1867: x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1868: y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
1869: n = x ? x->u.data : new_bgp->cf->default_local_pref;
1870: o = y ? y->u.data : old_bgp->cf->default_local_pref;
1871: if (n > o)
1872: return 1;
1873: if (n < o)
1874: return 0;
1875:
1876: /* RFC 7311 4.1 - Apply AIGP metric */
1877: u64 n2 = bgp_total_aigp_metric(new);
1878: u64 o2 = bgp_total_aigp_metric(old);
1879: if (n2 < o2)
1880: return 1;
1881: if (n2 > o2)
1882: return 0;
1883:
1884: /* RFC 4271 9.1.2.2. a) Use AS path lengths */
1885: if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
1886: {
1887: x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1888: y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
1889: n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
1890: o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
1891: if (n < o)
1892: return 1;
1893: if (n > o)
1894: return 0;
1895: }
1896:
1897: /* RFC 4271 9.1.2.2. b) Use origins */
1898: x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1899: y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
1900: n = x ? x->u.data : ORIGIN_INCOMPLETE;
1901: o = y ? y->u.data : ORIGIN_INCOMPLETE;
1902: if (n < o)
1903: return 1;
1904: if (n > o)
1905: return 0;
1906:
1907: /* RFC 4271 9.1.2.2. c) Compare MED's */
1908: /* Proper RFC 4271 path selection cannot be interpreted as finding
1909: * the best path in some ordering. It is implemented partially in
1910: * bgp_rte_recalculate() when deterministic_med option is
1911: * active. Without that option, the behavior is just an
1912: * approximation, which in specific situations may lead to
1913: * persistent routing loops, because it is nondeterministic - it
1914: * depends on the order in which routes appeared. But it is also the
1915: * same behavior as used by default in Cisco routers, so it is
1916: * probably not a big issue.
1917: */
1918: if (new_bgp->cf->med_metric || old_bgp->cf->med_metric ||
1919: (bgp_get_neighbor(new) == bgp_get_neighbor(old)))
1920: {
1921: x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1922: y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
1923: n = x ? x->u.data : new_bgp->cf->default_med;
1924: o = y ? y->u.data : old_bgp->cf->default_med;
1925: if (n < o)
1926: return 1;
1927: if (n > o)
1928: return 0;
1929: }
1930:
1931: /* RFC 4271 9.1.2.2. d) Prefer external peers */
1932: if (new_bgp->is_interior > old_bgp->is_interior)
1933: return 0;
1934: if (new_bgp->is_interior < old_bgp->is_interior)
1935: return 1;
1936:
1937: /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
1938: n = new_bgp->cf->igp_metric ? new->attrs->igp_metric : 0;
1939: o = old_bgp->cf->igp_metric ? old->attrs->igp_metric : 0;
1940: if (n < o)
1941: return 1;
1942: if (n > o)
1943: return 0;
1944:
1945: /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
1946: /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighbor ID */
1947: x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1948: y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGINATOR_ID));
1949: n = x ? x->u.data : new_bgp->remote_id;
1950: o = y ? y->u.data : old_bgp->remote_id;
1951:
1952: /* RFC 5004 - prefer older routes */
1953: /* (if both are external and from different peer) */
1954: if ((new_bgp->cf->prefer_older || old_bgp->cf->prefer_older) &&
1955: !new_bgp->is_internal && n != o)
1956: return 0;
1957:
1958: /* rest of RFC 4271 9.1.2.2. f) */
1959: if (n < o)
1960: return 1;
1961: if (n > o)
1962: return 0;
1963:
1964: /* RFC 4456 9. b) Compare cluster list lengths */
1965: x = ea_find(new->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1966: y = ea_find(old->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_CLUSTER_LIST));
1967: n = x ? int_set_get_size(x->u.ptr) : 0;
1968: o = y ? int_set_get_size(y->u.ptr) : 0;
1969: if (n < o)
1970: return 1;
1971: if (n > o)
1972: return 0;
1973:
1974: /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
1975: return ipa_compare(new_bgp->remote_ip, old_bgp->remote_ip) < 0;
1976: }
1977:
1978:
1979: int
1980: bgp_rte_mergable(rte *pri, rte *sec)
1981: {
1982: struct bgp_proto *pri_bgp = (struct bgp_proto *) pri->attrs->src->proto;
1983: struct bgp_proto *sec_bgp = (struct bgp_proto *) sec->attrs->src->proto;
1984: eattr *x, *y;
1985: u32 p, s;
1986:
1987: /* Skip suppressed routes (see bgp_rte_recalculate()) */
1988: if (pri->u.bgp.suppressed != sec->u.bgp.suppressed)
1989: return 0;
1990:
1991: /* RFC 4271 9.1.2.1. Route resolvability test */
1992: if (rte_resolvable(pri) != rte_resolvable(sec))
1993: return 0;
1994:
1995: /* LLGR draft - depreference stale routes */
1996: if (rte_stale(pri) != rte_stale(sec))
1997: return 0;
1998:
1999: /* Start with local preferences */
2000: x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
2001: y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_LOCAL_PREF));
2002: p = x ? x->u.data : pri_bgp->cf->default_local_pref;
2003: s = y ? y->u.data : sec_bgp->cf->default_local_pref;
2004: if (p != s)
2005: return 0;
2006:
2007: /* RFC 4271 9.1.2.2. a) Use AS path lengths */
2008: if (pri_bgp->cf->compare_path_lengths || sec_bgp->cf->compare_path_lengths)
2009: {
2010: x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2011: y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2012: p = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
2013: s = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
2014:
2015: if (p != s)
2016: return 0;
2017:
2018: // if (DELTA(p, s) > pri_bgp->cf->relax_multipath)
2019: // return 0;
2020: }
2021:
2022: /* RFC 4271 9.1.2.2. b) Use origins */
2023: x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2024: y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2025: p = x ? x->u.data : ORIGIN_INCOMPLETE;
2026: s = y ? y->u.data : ORIGIN_INCOMPLETE;
2027: if (p != s)
2028: return 0;
2029:
2030: /* RFC 4271 9.1.2.2. c) Compare MED's */
2031: if (pri_bgp->cf->med_metric || sec_bgp->cf->med_metric ||
2032: (bgp_get_neighbor(pri) == bgp_get_neighbor(sec)))
2033: {
2034: x = ea_find(pri->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
2035: y = ea_find(sec->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_MULTI_EXIT_DISC));
2036: p = x ? x->u.data : pri_bgp->cf->default_med;
2037: s = y ? y->u.data : sec_bgp->cf->default_med;
2038: if (p != s)
2039: return 0;
2040: }
2041:
2042: /* RFC 4271 9.1.2.2. d) Prefer external peers */
2043: if (pri_bgp->is_interior != sec_bgp->is_interior)
2044: return 0;
2045:
2046: /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
2047: p = pri_bgp->cf->igp_metric ? pri->attrs->igp_metric : 0;
2048: s = sec_bgp->cf->igp_metric ? sec->attrs->igp_metric : 0;
2049: if (p != s)
2050: return 0;
2051:
2052: /* Remaining criteria are ignored */
2053:
2054: return 1;
2055: }
2056:
2057:
2058: static inline int
2059: same_group(rte *r, u32 lpref, u32 lasn)
2060: {
2061: return (r->pref == lpref) && (bgp_get_neighbor(r) == lasn);
2062: }
2063:
2064: static inline int
2065: use_deterministic_med(rte *r)
2066: {
2067: struct proto *P = r->attrs->src->proto;
2068: return (P->proto == &proto_bgp) && ((struct bgp_proto *) P)->cf->deterministic_med;
2069: }
2070:
2071: int
2072: bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best)
2073: {
2074: rte *r, *s;
2075: rte *key = new ? new : old;
2076: u32 lpref = key->pref;
2077: u32 lasn = bgp_get_neighbor(key);
2078: int old_suppressed = old ? old->u.bgp.suppressed : 0;
2079:
2080: /*
2081: * Proper RFC 4271 path selection is a bit complicated, it cannot be
2082: * implemented just by rte_better(), because it is not a linear
2083: * ordering. But it can be splitted to two levels, where the lower
2084: * level chooses the best routes in each group of routes from the
2085: * same neighboring AS and higher level chooses the best route (with
2086: * a slightly different ordering) between the best-in-group routes.
2087: *
2088: * When deterministic_med is disabled, we just ignore this issue and
2089: * choose the best route by bgp_rte_better() alone. If enabled, the
2090: * lower level of the route selection is done here (for the group
2091: * to which the changed route belongs), all routes in group are
2092: * marked as suppressed, just chosen best-in-group is not.
2093: *
2094: * Global best route selection then implements higher level by
2095: * choosing between non-suppressed routes (as they are always
2096: * preferred over suppressed routes). Routes from BGP protocols
2097: * that do not set deterministic_med are just never suppressed. As
2098: * they do not participate in the lower level selection, it is OK
2099: * that this fn is not called for them.
2100: *
2101: * The idea is simple, the implementation is more problematic,
2102: * mostly because of optimizations in rte_recalculate() that
2103: * avoids full recalculation in most cases.
2104: *
2105: * We can assume that at least one of new, old is non-NULL and both
2106: * are from the same protocol with enabled deterministic_med. We
2107: * group routes by both neighbor AS (lasn) and preference (lpref),
2108: * because bgp_rte_better() does not handle preference itself.
2109: */
2110:
2111: /* If new and old are from different groups, we just process that
2112: as two independent events */
2113: if (new && old && !same_group(old, lpref, lasn))
2114: {
2115: int i1, i2;
2116: i1 = bgp_rte_recalculate(table, net, NULL, old, old_best);
2117: i2 = bgp_rte_recalculate(table, net, new, NULL, old_best);
2118: return i1 || i2;
2119: }
2120:
2121: /*
2122: * We could find the best-in-group and then make some shortcuts like
2123: * in rte_recalculate, but as we would have to walk through all
2124: * net->routes just to find it, it is probably not worth. So we
2125: * just have one simple fast case that use just the old route.
2126: * We also set suppressed flag to avoid using it in bgp_rte_better().
2127: */
2128:
2129: if (new)
2130: new->u.bgp.suppressed = 1;
2131:
2132: if (old)
2133: {
2134: old->u.bgp.suppressed = 1;
2135:
2136: /* The fast case - replace not best with worse (or remove not best) */
2137: if (old_suppressed && !(new && bgp_rte_better(new, old)))
2138: return 0;
2139: }
2140:
2141: /* The default case - find a new best-in-group route */
2142: r = new; /* new may not be in the list */
2143: for (s=net->routes; rte_is_valid(s); s=s->next)
2144: if (use_deterministic_med(s) && same_group(s, lpref, lasn))
2145: {
2146: s->u.bgp.suppressed = 1;
2147: if (!r || bgp_rte_better(s, r))
2148: r = s;
2149: }
2150:
2151: /* Simple case - the last route in group disappears */
2152: if (!r)
2153: return 0;
2154:
2155: /* Found if new is mergable with best-in-group */
2156: if (new && (new != r) && bgp_rte_mergable(r, new))
2157: new->u.bgp.suppressed = 0;
2158:
2159: /* Found all existing routes mergable with best-in-group */
2160: for (s=net->routes; rte_is_valid(s); s=s->next)
2161: if (use_deterministic_med(s) && same_group(s, lpref, lasn))
2162: if ((s != r) && bgp_rte_mergable(r, s))
2163: s->u.bgp.suppressed = 0;
2164:
2165: /* Found best-in-group */
2166: r->u.bgp.suppressed = 0;
2167:
2168: /*
2169: * There are generally two reasons why we have to force
2170: * recalculation (return 1): First, the new route may be wrongfully
2171: * chosen to be the best in the first case check in
2172: * rte_recalculate(), this may happen only if old_best is from the
2173: * same group. Second, another (different than new route)
2174: * best-in-group is chosen and that may be the proper best (although
2175: * rte_recalculate() without ignore that possibility).
2176: *
2177: * There are three possible cases according to whether the old route
2178: * was the best in group (OBG, i.e. !old_suppressed) and whether the
2179: * new route is the best in group (NBG, tested by r == new). These
2180: * cases work even if old or new is NULL.
2181: *
2182: * NBG -> new is a possible candidate for the best route, so we just
2183: * check for the first reason using same_group().
2184: *
2185: * !NBG && OBG -> Second reason applies, return 1
2186: *
2187: * !NBG && !OBG -> Best in group does not change, old != old_best,
2188: * rte_better(new, old_best) is false and therefore
2189: * the first reason does not apply, return 0
2190: */
2191:
2192: if (r == new)
2193: return old_best && same_group(old_best, lpref, lasn);
2194: else
2195: return !old_suppressed;
2196: }
2197:
2198: struct rte *
2199: bgp_rte_modify_stale(struct rte *r, struct linpool *pool)
2200: {
2201: eattr *a = ea_find(r->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_COMMUNITY));
2202: const struct adata *ad = a ? a->u.ptr : NULL;
2203: uint flags = a ? a->flags : BAF_PARTIAL;
2204:
2205: if (ad && int_set_contains(ad, BGP_COMM_NO_LLGR))
2206: return NULL;
2207:
2208: if (ad && int_set_contains(ad, BGP_COMM_LLGR_STALE))
2209: return r;
2210:
2211: r = rte_cow_rta(r, pool);
2212: bgp_set_attr_ptr(&(r->attrs->eattrs), pool, BA_COMMUNITY, flags,
2213: int_set_add(pool, ad, BGP_COMM_LLGR_STALE));
2214: r->u.bgp.stale = 1;
2215:
2216: return r;
2217: }
2218:
2219:
2220: /*
2221: * Reconstruct AS_PATH and AGGREGATOR according to RFC 6793 4.2.3
2222: */
2223: static void
2224: bgp_process_as4_attrs(ea_list **attrs, struct linpool *pool)
2225: {
2226: eattr *p2 = bgp_find_attr(*attrs, BA_AS_PATH);
2227: eattr *p4 = bgp_find_attr(*attrs, BA_AS4_PATH);
2228: eattr *a2 = bgp_find_attr(*attrs, BA_AGGREGATOR);
2229: eattr *a4 = bgp_find_attr(*attrs, BA_AS4_AGGREGATOR);
2230:
2231: /* First, unset AS4_* attributes */
2232: if (p4) bgp_unset_attr(attrs, pool, BA_AS4_PATH);
2233: if (a4) bgp_unset_attr(attrs, pool, BA_AS4_AGGREGATOR);
2234:
2235: /* Handle AGGREGATOR attribute */
2236: if (a2 && a4)
2237: {
2238: u32 a2_asn = get_u32(a2->u.ptr->data);
2239:
2240: /* If routes were aggregated by an old router, then AS4_PATH and
2241: AS4_AGGREGATOR are invalid. In that case we give up. */
2242: if (a2_asn != AS_TRANS)
2243: return;
2244:
2245: /* Use AS4_AGGREGATOR instead of AGGREGATOR */
2246: a2->u.ptr = a4->u.ptr;
2247: }
2248:
2249: /* Handle AS_PATH attribute */
2250: if (p2 && p4)
2251: {
2252: /* Both as_path_getlen() and as_path_cut() take AS_CONFED* as zero length */
2253: int p2_len = as_path_getlen(p2->u.ptr);
2254: int p4_len = as_path_getlen(p4->u.ptr);
2255:
2256: /* AS_PATH is too short, give up */
2257: if (p2_len < p4_len)
2258: return;
2259:
2260: /* Merge AS_PATH and AS4_PATH */
2261: struct adata *apc = as_path_cut(pool, p2->u.ptr, p2_len - p4_len);
2262: p2->u.ptr = as_path_merge(pool, apc, p4->u.ptr);
2263: }
2264: }
2265:
2266: int
2267: bgp_get_attr(eattr *a, byte *buf, int buflen)
2268: {
2269: uint i = EA_ID(a->id);
2270: const struct bgp_attr_desc *d;
2271: int len;
2272:
2273: if (bgp_attr_known(i))
2274: {
2275: d = &bgp_attr_table[i];
2276: len = bsprintf(buf, "%s", d->name);
2277: buf += len;
2278: if (d->format)
2279: {
2280: *buf++ = ':';
2281: *buf++ = ' ';
2282: d->format(a, buf, buflen - len - 2);
2283: return GA_FULL;
2284: }
2285: return GA_NAME;
2286: }
2287:
2288: bsprintf(buf, "%02x%s", i, (a->flags & BAF_TRANSITIVE) ? " [t]" : "");
2289: return GA_NAME;
2290: }
2291:
2292: void
2293: bgp_get_route_info(rte *e, byte *buf)
2294: {
2295: eattr *p = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_AS_PATH));
2296: eattr *o = ea_find(e->attrs->eattrs, EA_CODE(PROTOCOL_BGP, BA_ORIGIN));
2297: u32 origas;
2298:
2299: buf += bsprintf(buf, " (%d", e->pref);
2300:
2301: if (e->u.bgp.suppressed)
2302: buf += bsprintf(buf, "-");
2303:
2304: if (rte_stale(e))
2305: buf += bsprintf(buf, "s");
2306:
2307: u64 metric = bgp_total_aigp_metric(e);
2308: if (metric < BGP_AIGP_MAX)
2309: {
2310: buf += bsprintf(buf, "/%lu", metric);
2311: }
2312: else if (e->attrs->igp_metric)
2313: {
2314: if (!rte_resolvable(e))
2315: buf += bsprintf(buf, "/-");
2316: else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
2317: buf += bsprintf(buf, "/?");
2318: else
2319: buf += bsprintf(buf, "/%d", e->attrs->igp_metric);
2320: }
2321: buf += bsprintf(buf, ") [");
2322:
2323: if (p && as_path_get_last(p->u.ptr, &origas))
2324: buf += bsprintf(buf, "AS%u", origas);
2325: if (o)
2326: buf += bsprintf(buf, "%c", "ie?"[o->u.data]);
2327: strcpy(buf, "]");
2328: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>