Annotation of embedaddon/bird2/proto/bgp/packets.c, revision 1.1.1.1
1.1 misho 1: /*
2: * BIRD -- BGP Packet Processing
3: *
4: * (c) 2000 Martin Mares <mj@ucw.cz>
5: * (c) 2008--2016 Ondrej Zajicek <santiago@crfreenet.org>
6: * (c) 2008--2016 CZ.NIC z.s.p.o.
7: *
8: * Can be freely distributed and used under the terms of the GNU GPL.
9: */
10:
11: #undef LOCAL_DEBUG
12:
13: #include <stdlib.h>
14:
15: #include "nest/bird.h"
16: #include "nest/iface.h"
17: #include "nest/protocol.h"
18: #include "nest/route.h"
19: #include "nest/attrs.h"
20: #include "proto/mrt/mrt.h"
21: #include "conf/conf.h"
22: #include "lib/unaligned.h"
23: #include "lib/flowspec.h"
24: #include "lib/socket.h"
25:
26: #include "nest/cli.h"
27:
28: #include "bgp.h"
29:
30:
31: #define BGP_RR_REQUEST 0
32: #define BGP_RR_BEGIN 1
33: #define BGP_RR_END 2
34:
35: #define BGP_NLRI_MAX (4 + 1 + 32)
36:
37: #define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */
38: #define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */
39: #define BGP_MPLS_NULL 3 /* Implicit NULL label */
40: #define BGP_MPLS_MAGIC 0x800000 /* Magic withdraw label value, RFC 3107 3 */
41:
42:
43: static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
44: static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
45:
46: /* Table for state -> RFC 6608 FSM error subcodes */
47: static byte fsm_err_subcode[BS_MAX] = {
48: [BS_OPENSENT] = 1,
49: [BS_OPENCONFIRM] = 2,
50: [BS_ESTABLISHED] = 3
51: };
52:
53:
54: static struct bgp_channel *
55: bgp_get_channel(struct bgp_proto *p, u32 afi)
56: {
57: uint i;
58:
59: for (i = 0; i < p->channel_count; i++)
60: if (p->afi_map[i] == afi)
61: return p->channel_map[i];
62:
63: return NULL;
64: }
65:
66: static inline void
67: put_af3(byte *buf, u32 id)
68: {
69: put_u16(buf, id >> 16);
70: buf[2] = id & 0xff;
71: }
72:
73: static inline void
74: put_af4(byte *buf, u32 id)
75: {
76: put_u16(buf, id >> 16);
77: buf[2] = 0;
78: buf[3] = id & 0xff;
79: }
80:
81: static inline u32
82: get_af3(byte *buf)
83: {
84: return (get_u16(buf) << 16) | buf[2];
85: }
86:
87: static inline u32
88: get_af4(byte *buf)
89: {
90: return (get_u16(buf) << 16) | buf[3];
91: }
92:
93: static void
94: init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d)
95: {
96: struct bgp_proto *p = conn->bgp;
97: int p_ok = conn->state >= BS_OPENCONFIRM;
98:
99: memset(d, 0, sizeof(struct mrt_bgp_data));
100: d->peer_as = p->remote_as;
101: d->local_as = p->local_as;
102: d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0;
103: d->af = ipa_is_ip4(p->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6;
104: d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE;
105: d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE;
106: d->as4 = p_ok ? p->as4_session : 0;
107: }
108:
109: static uint bgp_find_update_afi(byte *pos, uint len);
110:
111: static int
112: bgp_estimate_add_path(struct bgp_proto *p, byte *pkt, uint len)
113: {
114: /* No need to estimate it for other messages than UPDATE */
115: if (pkt[18] != PKT_UPDATE)
116: return 0;
117:
118: /* 1 -> no channel, 2 -> all channels, 3 -> some channels */
119: if (p->summary_add_path_rx < 3)
120: return p->summary_add_path_rx == 2;
121:
122: uint afi = bgp_find_update_afi(pkt, len);
123: struct bgp_channel *c = bgp_get_channel(p, afi);
124: if (!c)
125: {
126: /* Either frame error (if !afi) or unknown AFI/SAFI,
127: will be reported later in regular parsing */
128: BGP_TRACE(D_PACKETS, "MRT processing noticed invalid packet");
129: return 0;
130: }
131:
132: return c->add_path_rx;
133: }
134:
135: static void
136: bgp_dump_message(struct bgp_conn *conn, byte *pkt, uint len)
137: {
138: struct mrt_bgp_data d;
139: init_mrt_bgp_data(conn, &d);
140:
141: d.message = pkt;
142: d.msg_len = len;
143: d.add_path = bgp_estimate_add_path(conn->bgp, pkt, len);
144:
145: mrt_dump_bgp_message(&d);
146: }
147:
148: void
149: bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new)
150: {
151: struct mrt_bgp_data d;
152: init_mrt_bgp_data(conn, &d);
153:
154: d.old_state = old;
155: d.new_state = new;
156:
157: mrt_dump_bgp_state_change(&d);
158: }
159:
160: static byte *
161: bgp_create_notification(struct bgp_conn *conn, byte *buf)
162: {
163: struct bgp_proto *p = conn->bgp;
164:
165: BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
166: buf[0] = conn->notify_code;
167: buf[1] = conn->notify_subcode;
168: memcpy(buf+2, conn->notify_data, conn->notify_size);
169: return buf + 2 + conn->notify_size;
170: }
171:
172:
173: /* Capability negotiation as per RFC 5492 */
174:
175: const struct bgp_af_caps *
176: bgp_find_af_caps(struct bgp_caps *caps, u32 afi)
177: {
178: struct bgp_af_caps *ac;
179:
180: WALK_AF_CAPS(caps, ac)
181: if (ac->afi == afi)
182: return ac;
183:
184: return NULL;
185: }
186:
187: static struct bgp_af_caps *
188: bgp_get_af_caps(struct bgp_caps **pcaps, u32 afi)
189: {
190: struct bgp_caps *caps = *pcaps;
191: struct bgp_af_caps *ac;
192:
193: WALK_AF_CAPS(caps, ac)
194: if (ac->afi == afi)
195: return ac;
196:
197: uint n = caps->af_count;
198: if (uint_is_pow2(n))
199: *pcaps = caps = mb_realloc(caps, sizeof(struct bgp_caps) +
200: (2 * n) * sizeof(struct bgp_af_caps));
201:
202: ac = &caps->af_data[caps->af_count++];
203: memset(ac, 0, sizeof(struct bgp_af_caps));
204: ac->afi = afi;
205:
206: return ac;
207: }
208:
209: static int
210: bgp_af_caps_cmp(const void *X, const void *Y)
211: {
212: const struct bgp_af_caps *x = X, *y = Y;
213: return (x->afi < y->afi) ? -1 : (x->afi > y->afi) ? 1 : 0;
214: }
215:
216:
217: void
218: bgp_prepare_capabilities(struct bgp_conn *conn)
219: {
220: struct bgp_proto *p = conn->bgp;
221: struct bgp_channel *c;
222: struct bgp_caps *caps;
223: struct bgp_af_caps *ac;
224:
225: if (!p->cf->capabilities)
226: {
227: /* Just prepare empty local_caps */
228: conn->local_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
229: return;
230: }
231:
232: /* Prepare bgp_caps structure */
233: int n = list_length(&p->p.channels);
234: caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + n * sizeof(struct bgp_af_caps));
235: conn->local_caps = caps;
236:
237: caps->as4_support = p->cf->enable_as4;
238: caps->ext_messages = p->cf->enable_extended_messages;
239: caps->route_refresh = p->cf->enable_refresh;
240: caps->enhanced_refresh = p->cf->enable_refresh;
241:
242: if (caps->as4_support)
243: caps->as4_number = p->public_as;
244:
245: if (p->cf->gr_mode)
246: {
247: caps->gr_aware = 1;
248: caps->gr_time = p->cf->gr_time;
249: caps->gr_flags = p->p.gr_recovery ? BGP_GRF_RESTART : 0;
250: }
251:
252: if (p->cf->llgr_mode)
253: caps->llgr_aware = 1;
254:
255: /* Allocate and fill per-AF fields */
256: WALK_LIST(c, p->p.channels)
257: {
258: ac = &caps->af_data[caps->af_count++];
259: ac->afi = c->afi;
260: ac->ready = 1;
261:
262: ac->ext_next_hop = bgp_channel_is_ipv4(c) && c->cf->ext_next_hop;
263: caps->any_ext_next_hop |= ac->ext_next_hop;
264:
265: ac->add_path = c->cf->add_path;
266: caps->any_add_path |= ac->add_path;
267:
268: if (c->cf->gr_able)
269: {
270: ac->gr_able = 1;
271:
272: if (p->p.gr_recovery)
273: ac->gr_af_flags |= BGP_GRF_FORWARDING;
274: }
275:
276: if (c->cf->llgr_able)
277: {
278: ac->llgr_able = 1;
279: ac->llgr_time = c->cf->llgr_time;
280:
281: if (p->p.gr_recovery)
282: ac->llgr_flags |= BGP_LLGRF_FORWARDING;
283: }
284: }
285:
286: /* Sort capability fields by AFI/SAFI */
287: qsort(caps->af_data, caps->af_count, sizeof(struct bgp_af_caps), bgp_af_caps_cmp);
288: }
289:
290: static byte *
291: bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
292: {
293: struct bgp_proto *p = conn->bgp;
294: struct bgp_caps *caps = conn->local_caps;
295: struct bgp_af_caps *ac;
296: byte *buf_head = buf;
297: byte *data;
298:
299: /* Create capability list in buffer */
300:
301: /*
302: * Note that max length is ~ 22+21*af_count. With max 12 channels that is
303: * 274. We are limited just by buffer size (4096, minus header), as we support
304: * extended optional parameres. Therefore, we have enough space for expansion.
305: */
306:
307: WALK_AF_CAPS(caps, ac)
308: if (ac->ready)
309: {
310: *buf++ = 1; /* Capability 1: Multiprotocol extensions */
311: *buf++ = 4; /* Capability data length */
312: put_af4(buf, ac->afi);
313: buf += 4;
314: }
315:
316: if (caps->route_refresh)
317: {
318: *buf++ = 2; /* Capability 2: Support for route refresh */
319: *buf++ = 0; /* Capability data length */
320: }
321:
322: if (caps->any_ext_next_hop)
323: {
324: *buf++ = 5; /* Capability 5: Support for extended next hop */
325: *buf++ = 0; /* Capability data length, will be fixed later */
326: data = buf;
327:
328: WALK_AF_CAPS(caps, ac)
329: if (ac->ext_next_hop)
330: {
331: put_af4(buf, ac->afi);
332: put_u16(buf+4, BGP_AFI_IPV6);
333: buf += 6;
334: }
335:
336: data[-1] = buf - data;
337: }
338:
339: if (caps->ext_messages)
340: {
341: *buf++ = 6; /* Capability 6: Support for extended messages */
342: *buf++ = 0; /* Capability data length */
343: }
344:
345: if (caps->gr_aware)
346: {
347: *buf++ = 64; /* Capability 64: Support for graceful restart */
348: *buf++ = 0; /* Capability data length, will be fixed later */
349: data = buf;
350:
351: put_u16(buf, caps->gr_time);
352: buf[0] |= caps->gr_flags;
353: buf += 2;
354:
355: WALK_AF_CAPS(caps, ac)
356: if (ac->gr_able)
357: {
358: put_af3(buf, ac->afi);
359: buf[3] = ac->gr_af_flags;
360: buf += 4;
361: }
362:
363: data[-1] = buf - data;
364: }
365:
366: if (caps->as4_support)
367: {
368: *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
369: *buf++ = 4; /* Capability data length */
370: put_u32(buf, p->public_as);
371: buf += 4;
372: }
373:
374: if (caps->any_add_path)
375: {
376: *buf++ = 69; /* Capability 69: Support for ADD-PATH */
377: *buf++ = 0; /* Capability data length, will be fixed later */
378: data = buf;
379:
380: WALK_AF_CAPS(caps, ac)
381: if (ac->add_path)
382: {
383: put_af3(buf, ac->afi);
384: buf[3] = ac->add_path;
385: buf += 4;
386: }
387:
388: data[-1] = buf - data;
389: }
390:
391: if (caps->enhanced_refresh)
392: {
393: *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
394: *buf++ = 0; /* Capability data length */
395: }
396:
397: if (caps->llgr_aware)
398: {
399: *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */
400: *buf++ = 0; /* Capability data length, will be fixed later */
401: data = buf;
402:
403: WALK_AF_CAPS(caps, ac)
404: if (ac->llgr_able)
405: {
406: put_af3(buf, ac->afi);
407: buf[3] = ac->llgr_flags;
408: put_u24(buf+4, ac->llgr_time);
409: buf += 7;
410: }
411:
412: data[-1] = buf - data;
413: }
414:
415: caps->length = buf - buf_head;
416:
417: return buf;
418: }
419:
420: static int
421: bgp_read_capabilities(struct bgp_conn *conn, byte *pos, int len)
422: {
423: struct bgp_proto *p = conn->bgp;
424: struct bgp_caps *caps;
425: struct bgp_af_caps *ac;
426: int i, cl;
427: u32 af;
428:
429: if (!conn->remote_caps)
430: caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps) + sizeof(struct bgp_af_caps));
431: else
432: {
433: caps = conn->remote_caps;
434: conn->remote_caps = NULL;
435: }
436:
437: caps->length += len;
438:
439: while (len > 0)
440: {
441: if (len < 2 || len < (2 + pos[1]))
442: goto err;
443:
444: /* Capability length */
445: cl = pos[1];
446:
447: /* Capability type */
448: switch (pos[0])
449: {
450: case 1: /* Multiprotocol capability, RFC 4760 */
451: if (cl != 4)
452: goto err;
453:
454: af = get_af4(pos+2);
455: ac = bgp_get_af_caps(&caps, af);
456: ac->ready = 1;
457: break;
458:
459: case 2: /* Route refresh capability, RFC 2918 */
460: if (cl != 0)
461: goto err;
462:
463: caps->route_refresh = 1;
464: break;
465:
466: case 5: /* Extended next hop encoding capability, RFC 5549 */
467: if (cl % 6)
468: goto err;
469:
470: for (i = 0; i < cl; i += 6)
471: {
472: /* Specified only for IPv4 prefixes with IPv6 next hops */
473: if ((get_u16(pos+2+i+0) != BGP_AFI_IPV4) ||
474: (get_u16(pos+2+i+4) != BGP_AFI_IPV6))
475: continue;
476:
477: af = get_af4(pos+2+i);
478: ac = bgp_get_af_caps(&caps, af);
479: ac->ext_next_hop = 1;
480: }
481: break;
482:
483: case 6: /* Extended message length capability, RFC draft */
484: if (cl != 0)
485: goto err;
486:
487: caps->ext_messages = 1;
488: break;
489:
490: case 64: /* Graceful restart capability, RFC 4724 */
491: if (cl % 4 != 2)
492: goto err;
493:
494: /* Only the last instance is valid */
495: WALK_AF_CAPS(caps, ac)
496: {
497: ac->gr_able = 0;
498: ac->gr_af_flags = 0;
499: }
500:
501: caps->gr_aware = 1;
502: caps->gr_flags = pos[2] & 0xf0;
503: caps->gr_time = get_u16(pos + 2) & 0x0fff;
504:
505: for (i = 2; i < cl; i += 4)
506: {
507: af = get_af3(pos+2+i);
508: ac = bgp_get_af_caps(&caps, af);
509: ac->gr_able = 1;
510: ac->gr_af_flags = pos[2+i+3];
511: }
512: break;
513:
514: case 65: /* AS4 capability, RFC 6793 */
515: if (cl != 4)
516: goto err;
517:
518: caps->as4_support = 1;
519: caps->as4_number = get_u32(pos + 2);
520: break;
521:
522: case 69: /* ADD-PATH capability, RFC 7911 */
523: if (cl % 4)
524: goto err;
525:
526: for (i = 0; i < cl; i += 4)
527: {
528: byte val = pos[2+i+3];
529: if (!val || (val > BGP_ADD_PATH_FULL))
530: {
531: log(L_WARN "%s: Got ADD-PATH capability with unknown value %u, ignoring",
532: p->p.name, val);
533: break;
534: }
535: }
536:
537: for (i = 0; i < cl; i += 4)
538: {
539: af = get_af3(pos+2+i);
540: ac = bgp_get_af_caps(&caps, af);
541: ac->add_path = pos[2+i+3];
542: }
543: break;
544:
545: case 70: /* Enhanced route refresh capability, RFC 7313 */
546: if (cl != 0)
547: goto err;
548:
549: caps->enhanced_refresh = 1;
550: break;
551:
552: case 71: /* Long lived graceful restart capability, RFC draft */
553: if (cl % 7)
554: goto err;
555:
556: /* Presumably, only the last instance is valid */
557: WALK_AF_CAPS(caps, ac)
558: {
559: ac->llgr_able = 0;
560: ac->llgr_flags = 0;
561: ac->llgr_time = 0;
562: }
563:
564: caps->llgr_aware = 1;
565:
566: for (i = 0; i < cl; i += 7)
567: {
568: af = get_af3(pos+2+i);
569: ac = bgp_get_af_caps(&caps, af);
570: ac->llgr_able = 1;
571: ac->llgr_flags = pos[2+i+3];
572: ac->llgr_time = get_u24(pos + 2+i+4);
573: }
574: break;
575:
576: /* We can safely ignore all other capabilities */
577: }
578:
579: ADVANCE(pos, len, 2 + cl);
580: }
581:
582: /* The LLGR capability must be advertised together with the GR capability,
583: otherwise it must be disregarded */
584: if (!caps->gr_aware && caps->llgr_aware)
585: {
586: caps->llgr_aware = 0;
587: WALK_AF_CAPS(caps, ac)
588: {
589: ac->llgr_able = 0;
590: ac->llgr_flags = 0;
591: ac->llgr_time = 0;
592: }
593: }
594:
595: conn->remote_caps = caps;
596: return 0;
597:
598: err:
599: mb_free(caps);
600: bgp_error(conn, 2, 0, NULL, 0);
601: return -1;
602: }
603:
604: static int
605: bgp_check_capabilities(struct bgp_conn *conn)
606: {
607: struct bgp_proto *p = conn->bgp;
608: struct bgp_caps *local = conn->local_caps;
609: struct bgp_caps *remote = conn->remote_caps;
610: struct bgp_channel *c;
611: int count = 0;
612:
613: /* This is partially overlapping with bgp_conn_enter_established_state(),
614: but we need to run this just after we receive OPEN message */
615:
616: WALK_LIST(c, p->p.channels)
617: {
618: const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
619: const struct bgp_af_caps *rem = bgp_find_af_caps(remote, c->afi);
620:
621: /* Find out whether this channel will be active */
622: int active = loc && loc->ready &&
623: ((rem && rem->ready) || (!remote->length && (c->afi == BGP_AF_IPV4)));
624:
625: /* Mandatory must be active */
626: if (c->cf->mandatory && !active)
627: return 0;
628:
629: if (active)
630: count++;
631: }
632:
633: /* We need at least one channel active */
634: if (!count)
635: return 0;
636:
637: return 1;
638: }
639:
640: static int
641: bgp_read_options(struct bgp_conn *conn, byte *pos, uint len, uint rest)
642: {
643: struct bgp_proto *p = conn->bgp;
644: int ext = 0;
645:
646: /* Handle extended length (draft-ietf-idr-ext-opt-param-07) */
647: if ((len > 0) && (rest > 0) && (pos[0] == 255))
648: {
649: if (rest < 3)
650: goto err;
651:
652: /* Update pos/len to describe optional data */
653: len = get_u16(pos+1);
654: ext = 1;
655: pos += 3;
656: rest -= 3;
657: }
658:
659: /* Verify that optional data fits into OPEN packet */
660: if (len > rest)
661: goto err;
662:
663: /* Length of option parameter header */
664: uint hlen = ext ? 3 : 2;
665:
666: while (len > 0)
667: {
668: if (len < hlen)
669: goto err;
670:
671: uint otype = get_u8(pos);
672: uint olen = ext ? get_u16(pos+1) : get_u8(pos+1);
673:
674: if (len < (hlen + olen))
675: goto err;
676:
677: if (otype == 2)
678: {
679: /* BGP capabilities, RFC 5492 */
680: if (p->cf->capabilities)
681: if (bgp_read_capabilities(conn, pos + hlen, olen) < 0)
682: return -1;
683: }
684: else
685: {
686: /* Unknown option */
687: bgp_error(conn, 2, 4, pos, hlen + olen);
688: return -1;
689: }
690:
691: ADVANCE(pos, len, hlen + olen);
692: }
693:
694: /* Prepare empty caps if no capability option was announced */
695: if (!conn->remote_caps)
696: conn->remote_caps = mb_allocz(p->p.pool, sizeof(struct bgp_caps));
697:
698: return 0;
699:
700: err:
701: bgp_error(conn, 2, 0, NULL, 0);
702: return -1;
703: }
704:
705: static byte *
706: bgp_create_open(struct bgp_conn *conn, byte *buf)
707: {
708: struct bgp_proto *p = conn->bgp;
709:
710: BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
711: BGP_VERSION, p->public_as, p->cf->hold_time, p->local_id);
712:
713: buf[0] = BGP_VERSION;
714: put_u16(buf+1, (p->public_as < 0xFFFF) ? p->public_as : AS_TRANS);
715: put_u16(buf+3, p->cf->hold_time);
716: put_u32(buf+5, p->local_id);
717:
718: if (p->cf->capabilities)
719: {
720: /* Prepare local_caps and write capabilities to buffer */
721: byte *pos = buf+12;
722: byte *end = bgp_write_capabilities(conn, pos);
723: uint len = end - pos;
724:
725: if (len < 254)
726: {
727: buf[9] = len + 2; /* Optional parameters length */
728: buf[10] = 2; /* Option 2: Capability list */
729: buf[11] = len; /* Option data length */
730: }
731: else /* draft-ietf-idr-ext-opt-param-07 */
732: {
733: /* Move capabilities 4 B forward */
734: memmove(buf + 16, pos, len);
735: pos = buf + 16;
736: end = pos + len;
737:
738: buf[9] = 255; /* Non-ext OP length, fake */
739: buf[10] = 255; /* Non-ext OP type, signals extended length */
740: put_u16(buf+11, len + 3); /* Extended optional parameters length */
741: buf[13] = 2; /* Option 2: Capability list */
742: put_u16(buf+14, len); /* Option extended data length */
743: }
744:
745: return end;
746: }
747: else
748: {
749: buf[9] = 0; /* No optional parameters */
750: return buf + 10;
751: }
752:
753: return buf;
754: }
755:
756: static void
757: bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
758: {
759: struct bgp_proto *p = conn->bgp;
760: struct bgp_conn *other;
761: u32 asn, hold, id;
762:
763: /* Check state */
764: if (conn->state != BS_OPENSENT)
765: { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
766:
767: /* Check message length */
768: if (len < 29)
769: { bgp_error(conn, 1, 2, pkt+16, 2); return; }
770:
771: if (pkt[19] != BGP_VERSION)
772: { u16 val = BGP_VERSION; bgp_error(conn, 2, 1, (byte *) &val, 2); return; }
773:
774: asn = get_u16(pkt+20);
775: hold = get_u16(pkt+22);
776: id = get_u32(pkt+24);
777: BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%R)", asn, hold, id);
778:
779: if (bgp_read_options(conn, pkt+29, pkt[28], len-29) < 0)
780: return;
781:
782: if (hold > 0 && hold < 3)
783: { bgp_error(conn, 2, 6, pkt+22, 2); return; }
784:
785: /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
786: if (!id || (p->is_internal && id == p->local_id))
787: { bgp_error(conn, 2, 3, pkt+24, -4); return; }
788:
789: /* RFC 5492 4 - check for required capabilities */
790: if (p->cf->capabilities && !bgp_check_capabilities(conn))
791: { bgp_error(conn, 2, 7, NULL, 0); return; }
792:
793: struct bgp_caps *caps = conn->remote_caps;
794:
795: if (caps->as4_support)
796: {
797: u32 as4 = caps->as4_number;
798:
799: if ((as4 != asn) && (asn != AS_TRANS))
800: log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
801:
802: /* When remote ASN is unspecified, it must be external one */
803: if (p->remote_as ? (as4 != p->remote_as) : (as4 == p->local_as))
804: { as4 = htonl(as4); bgp_error(conn, 2, 2, (byte *) &as4, 4); return; }
805:
806: conn->received_as = as4;
807: }
808: else
809: {
810: if (p->remote_as ? (asn != p->remote_as) : (asn == p->local_as))
811: { bgp_error(conn, 2, 2, pkt+20, 2); return; }
812:
813: conn->received_as = asn;
814: }
815:
816: /* Check the other connection */
817: other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
818: switch (other->state)
819: {
820: case BS_CONNECT:
821: case BS_ACTIVE:
822: /* Stop outgoing connection attempts */
823: bgp_conn_enter_idle_state(other);
824: break;
825:
826: case BS_IDLE:
827: case BS_OPENSENT:
828: case BS_CLOSE:
829: break;
830:
831: case BS_OPENCONFIRM:
832: /*
833: * Description of collision detection rules in RFC 4271 is confusing and
834: * contradictory, but it is essentially:
835: *
836: * 1. Router with higher ID is dominant
837: * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
838: * 3. When both connections are in OpenConfirm state, one initiated by
839: * the dominant router is kept.
840: *
841: * The first line in the expression below evaluates whether the neighbor
842: * is dominant, the second line whether the new connection was initiated
843: * by the neighbor. If both are true (or both are false), we keep the new
844: * connection, otherwise we keep the old one.
845: */
846: if (((p->local_id < id) || ((p->local_id == id) && (p->public_as < p->remote_as)))
847: == (conn == &p->incoming_conn))
848: {
849: /* Should close the other connection */
850: BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
851: bgp_error(other, 6, 7, NULL, 0);
852: break;
853: }
854: /* Fall thru */
855: case BS_ESTABLISHED:
856: /* Should close this connection */
857: BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
858: bgp_error(conn, 6, 7, NULL, 0);
859: return;
860:
861: default:
862: bug("bgp_rx_open: Unknown state");
863: }
864:
865: /* Update our local variables */
866: conn->hold_time = MIN(hold, p->cf->hold_time);
867: conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
868: conn->as4_session = conn->local_caps->as4_support && caps->as4_support;
869: conn->ext_messages = conn->local_caps->ext_messages && caps->ext_messages;
870: p->remote_id = id;
871:
872: DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n",
873: conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, conn->as4_session);
874:
875: bgp_schedule_packet(conn, NULL, PKT_KEEPALIVE);
876: bgp_start_timer(conn->hold_timer, conn->hold_time);
877: bgp_conn_enter_openconfirm_state(conn);
878: }
879:
880:
881: /*
882: * Next hop handling
883: */
884:
885: #define REPORT(msg, args...) \
886: ({ log(L_REMOTE "%s: " msg, s->proto->p.name, ## args); })
887:
888: #define DISCARD(msg, args...) \
889: ({ REPORT(msg, ## args); return; })
890:
891: #define WITHDRAW(msg, args...) \
892: ({ REPORT(msg, ## args); s->err_withdraw = 1; return; })
893:
894: #define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
895: #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
896: #define NO_NEXT_HOP "Missing NEXT_HOP attribute"
897: #define NO_LABEL_STACK "Missing MPLS stack"
898:
899:
900: static void
901: bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
902: {
903: struct bgp_proto *p = s->proto;
904: struct bgp_channel *c = s->channel;
905:
906: if (c->cf->gw_mode == GW_DIRECT)
907: {
908: neighbor *nbr = NULL;
909:
910: /* GW_DIRECT -> single_hop -> p->neigh != NULL */
911: if (ipa_nonzero(gw))
912: nbr = neigh_find(&p->p, gw, NULL, 0);
913: else if (ipa_nonzero(ll))
914: nbr = neigh_find(&p->p, ll, p->neigh->iface, 0);
915:
916: if (!nbr || (nbr->scope == SCOPE_HOST))
917: WITHDRAW(BAD_NEXT_HOP);
918:
919: a->dest = RTD_UNICAST;
920: a->nh.gw = nbr->addr;
921: a->nh.iface = nbr->iface;
922: a->igp_metric = c->cf->cost;
923: }
924: else /* GW_RECURSIVE */
925: {
926: if (ipa_zero(gw))
927: WITHDRAW(BAD_NEXT_HOP);
928:
929: rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
930: s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table);
931:
932: if (!s->mpls)
933: rta_apply_hostentry(a, s->hostentry, NULL);
934:
935: /* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
936: }
937: }
938:
939: static void
940: bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
941: {
942: if (lnum > MPLS_MAX_LABEL_STACK)
943: {
944: REPORT("Too many MPLS labels ($u)", lnum);
945:
946: a->dest = RTD_UNREACHABLE;
947: a->hostentry = NULL;
948: a->nh = (struct nexthop) { };
949: return;
950: }
951:
952: /* Handle implicit NULL as empty MPLS stack */
953: if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
954: lnum = 0;
955:
956: if (s->channel->cf->gw_mode == GW_DIRECT)
957: {
958: a->nh.labels = lnum;
959: memcpy(a->nh.label, labels, 4*lnum);
960: }
961: else /* GW_RECURSIVE */
962: {
963: mpls_label_stack ms;
964:
965: ms.len = lnum;
966: memcpy(ms.stack, labels, 4*lnum);
967: rta_apply_hostentry(a, s->hostentry, &ms);
968: }
969: }
970:
971:
972: static int
973: bgp_match_src(struct bgp_export_state *s, int mode)
974: {
975: switch (mode)
976: {
977: case NH_NO: return 0;
978: case NH_ALL: return 1;
979: case NH_IBGP: return s->src && s->src->is_internal;
980: case NH_EBGP: return s->src && !s->src->is_internal;
981: default: return 0;
982: }
983: }
984:
985: static inline int
986: bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
987: {
988: struct bgp_proto *p = s->proto;
989: struct bgp_channel *c = s->channel;
990: ip_addr *nh = (void *) a->u.ptr->data;
991:
992: /* Handle next hop self option */
993: if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self))
994: return 0;
995:
996: /* Handle next hop keep option */
997: if (c->cf->next_hop_keep && bgp_match_src(s, c->cf->next_hop_keep))
998: return 1;
999:
1000: /* Keep it when explicitly set in export filter */
1001: if (a->type & EAF_FRESH)
1002: return 1;
1003:
1004: /* Check for non-matching AF */
1005: if ((ipa_is_ip4(*nh) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
1006: return 0;
1007:
1008: /* Keep it when exported to internal peers */
1009: if (p->is_interior && ipa_nonzero(*nh))
1010: return 1;
1011:
1012: /* Keep it when forwarded between single-hop BGPs on the same iface */
1013: struct iface *ifa = (s->src && s->src->neigh) ? s->src->neigh->iface : NULL;
1014: return p->neigh && (p->neigh->iface == ifa);
1015: }
1016:
1017: static inline int
1018: bgp_use_gateway(struct bgp_export_state *s)
1019: {
1020: struct bgp_proto *p = s->proto;
1021: struct bgp_channel *c = s->channel;
1022: rta *ra = s->route->attrs;
1023:
1024: /* Handle next hop self option - also applies to gateway */
1025: if (c->cf->next_hop_self && bgp_match_src(s, c->cf->next_hop_self))
1026: return 0;
1027:
1028: /* We need one valid global gateway */
1029: if ((ra->dest != RTD_UNICAST) || ra->nh.next || ipa_zero(ra->nh.gw) || ipa_is_link_local(ra->nh.gw))
1030: return 0;
1031:
1032: /* Check for non-matching AF */
1033: if ((ipa_is_ip4(ra->nh.gw) != bgp_channel_is_ipv4(c)) && !c->ext_next_hop)
1034: return 0;
1035:
1036: /* Use it when exported to internal peers */
1037: if (p->is_interior)
1038: return 1;
1039:
1040: /* Use it when forwarded to single-hop BGP peer on on the same iface */
1041: return p->neigh && (p->neigh->iface == ra->nh.iface);
1042: }
1043:
1044: static void
1045: bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
1046: {
1047: if (!a || !bgp_use_next_hop(s, a))
1048: {
1049: if (bgp_use_gateway(s))
1050: {
1051: rta *ra = s->route->attrs;
1052: ip_addr nh[1] = { ra->nh.gw };
1053: bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
1054:
1055: if (s->mpls)
1056: {
1057: u32 implicit_null = BGP_MPLS_NULL;
1058: u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
1059: uint lnum = ra->nh.labels ? ra->nh.labels : 1;
1060: bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
1061: }
1062: }
1063: else
1064: {
1065: ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
1066: bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
1067: s->local_next_hop = 1;
1068:
1069: /* TODO: Use local MPLS assigned label */
1070: if (s->mpls)
1071: {
1072: u32 implicit_null = BGP_MPLS_NULL;
1073: bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, &implicit_null, 4);
1074: }
1075: }
1076: }
1077:
1078: /* Check if next hop is valid */
1079: a = bgp_find_attr(*to, BA_NEXT_HOP);
1080: if (!a)
1081: WITHDRAW(NO_NEXT_HOP);
1082:
1083: ip_addr *nh = (void *) a->u.ptr->data;
1084: ip_addr peer = s->proto->remote_ip;
1085: uint len = a->u.ptr->length;
1086:
1087: /* Forbid zero next hop */
1088: if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
1089: WITHDRAW(BAD_NEXT_HOP);
1090:
1091: /* Forbid next hop equal to neighbor IP */
1092: if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
1093: WITHDRAW(BAD_NEXT_HOP);
1094:
1095: /* Forbid next hop with non-matching AF */
1096: if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) &&
1097: !s->channel->ext_next_hop)
1098: WITHDRAW(BAD_NEXT_HOP);
1099:
1100: /* Just check if MPLS stack */
1101: if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
1102: WITHDRAW(NO_LABEL_STACK);
1103: }
1104:
1105: static uint
1106: bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
1107: {
1108: /* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
1109: ip_addr *nh = (void *) a->u.ptr->data;
1110: uint len = a->u.ptr->length;
1111:
1112: ASSERT((len == 16) || (len == 32));
1113:
1114: /*
1115: * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
1116: * is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
1117: * is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
1118: * IPv6 address with IPv6 NLRI.
1119: */
1120:
1121: if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
1122: {
1123: put_ip4(buf, ipa_to_ip4(nh[0]));
1124: return 4;
1125: }
1126:
1127: put_ip6(buf, ipa_to_ip6(nh[0]));
1128:
1129: if (len == 32)
1130: put_ip6(buf+16, ipa_to_ip6(nh[1]));
1131:
1132: return len;
1133: }
1134:
1135: static void
1136: bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
1137: {
1138: struct bgp_channel *c = s->channel;
1139: struct adata *ad = lp_alloc_adata(s->pool, 32);
1140: ip_addr *nh = (void *) ad->data;
1141:
1142: if (len == 4)
1143: {
1144: nh[0] = ipa_from_ip4(get_ip4(data));
1145: nh[1] = IPA_NONE;
1146: }
1147: else if (len == 16)
1148: {
1149: nh[0] = ipa_from_ip6(get_ip6(data));
1150: nh[1] = IPA_NONE;
1151:
1152: if (ipa_is_link_local(nh[0]))
1153: { nh[1] = nh[0]; nh[0] = IPA_NONE; }
1154: }
1155: else if (len == 32)
1156: {
1157: nh[0] = ipa_from_ip6(get_ip6(data));
1158: nh[1] = ipa_from_ip6(get_ip6(data+16));
1159:
1160: if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
1161: nh[1] = IPA_NONE;
1162: }
1163: else
1164: bgp_parse_error(s, 9);
1165:
1166: if (ipa_zero(nh[1]))
1167: ad->length = 16;
1168:
1169: if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
1170: WITHDRAW(BAD_NEXT_HOP);
1171:
1172: // XXXX validate next hop
1173:
1174: bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
1175: bgp_apply_next_hop(s, a, nh[0], nh[1]);
1176: }
1177:
1178: static uint
1179: bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
1180: {
1181: ip_addr *nh = (void *) a->u.ptr->data;
1182: uint len = a->u.ptr->length;
1183:
1184: ASSERT((len == 16) || (len == 32));
1185:
1186: /*
1187: * Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
1188: * is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
1189: * is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
1190: * IPv6 address with VPNv6 NLRI.
1191: */
1192:
1193: if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
1194: {
1195: put_u64(buf, 0); /* VPN RD is 0 */
1196: put_ip4(buf+8, ipa_to_ip4(nh[0]));
1197: return 12;
1198: }
1199:
1200: put_u64(buf, 0); /* VPN RD is 0 */
1201: put_ip6(buf+8, ipa_to_ip6(nh[0]));
1202:
1203: if (len == 16)
1204: return 24;
1205:
1206: put_u64(buf+24, 0); /* VPN RD is 0 */
1207: put_ip6(buf+32, ipa_to_ip6(nh[1]));
1208:
1209: return 48;
1210: }
1211:
1212: static void
1213: bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
1214: {
1215: struct bgp_channel *c = s->channel;
1216: struct adata *ad = lp_alloc_adata(s->pool, 32);
1217: ip_addr *nh = (void *) ad->data;
1218:
1219: if (len == 12)
1220: {
1221: nh[0] = ipa_from_ip4(get_ip4(data+8));
1222: nh[1] = IPA_NONE;
1223: }
1224: else if (len == 24)
1225: {
1226: nh[0] = ipa_from_ip6(get_ip6(data+8));
1227: nh[1] = IPA_NONE;
1228:
1229: if (ipa_is_link_local(nh[0]))
1230: { nh[1] = nh[0]; nh[0] = IPA_NONE; }
1231: }
1232: else if (len == 48)
1233: {
1234: nh[0] = ipa_from_ip6(get_ip6(data+8));
1235: nh[1] = ipa_from_ip6(get_ip6(data+32));
1236:
1237: if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
1238: nh[1] = IPA_NONE;
1239: }
1240: else
1241: bgp_parse_error(s, 9);
1242:
1243: if (ipa_zero(nh[1]))
1244: ad->length = 16;
1245:
1246: /* XXXX which error */
1247: if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0)))
1248: bgp_parse_error(s, 9);
1249:
1250: if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
1251: WITHDRAW(BAD_NEXT_HOP);
1252:
1253: // XXXX validate next hop
1254:
1255: bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
1256: bgp_apply_next_hop(s, a, nh[0], nh[1]);
1257: }
1258:
1259:
1260:
1261: static uint
1262: bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
1263: {
1264: return 0;
1265: }
1266:
1267: static void
1268: bgp_decode_next_hop_none(struct bgp_parse_state *s UNUSED, byte *data UNUSED, uint len UNUSED, rta *a UNUSED)
1269: {
1270: /*
1271: * Although we expect no next hop and RFC 7606 7.11 states that attribute
1272: * MP_REACH_NLRI with unexpected next hop length is considered malformed,
1273: * FlowSpec RFC 5575 4 states that next hop shall be ignored on receipt.
1274: */
1275:
1276: return;
1277: }
1278:
1279: static void
1280: bgp_update_next_hop_none(struct bgp_export_state *s, eattr *a, ea_list **to)
1281: {
1282: /* NEXT_HOP shall not pass */
1283: if (a)
1284: bgp_unset_attr(to, s->pool, BA_NEXT_HOP);
1285: }
1286:
1287:
1288: /*
1289: * UPDATE
1290: */
1291:
1292: static void
1293: bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
1294: {
1295: if (path_id != s->last_id)
1296: {
1297: s->last_src = rt_get_source(&s->proto->p, path_id);
1298: s->last_id = path_id;
1299:
1300: rta_free(s->cached_rta);
1301: s->cached_rta = NULL;
1302: }
1303:
1304: if (!a0)
1305: {
1306: /* Route withdraw */
1307: rte_update3(&s->channel->c, n, NULL, s->last_src);
1308: return;
1309: }
1310:
1311: /* Prepare cached route attributes */
1312: if (s->cached_rta == NULL)
1313: {
1314: a0->src = s->last_src;
1315:
1316: /* Workaround for rta_lookup() breaking eattrs */
1317: ea_list *ea = a0->eattrs;
1318: s->cached_rta = rta_lookup(a0);
1319: a0->eattrs = ea;
1320: }
1321:
1322: rta *a = rta_clone(s->cached_rta);
1323: rte *e = rte_get_temp(a);
1324:
1325: e->pflags = 0;
1326: e->u.bgp.suppressed = 0;
1327: e->u.bgp.stale = -1;
1328: rte_update3(&s->channel->c, n, e, s->last_src);
1329: }
1330:
1331: static void
1332: bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, const adata *mpls, byte **pos, uint *size, byte *pxlen)
1333: {
1334: const u32 dummy = 0;
1335: const u32 *labels = mpls ? (const u32 *) mpls->data : &dummy;
1336: uint lnum = mpls ? (mpls->length / 4) : 1;
1337:
1338: for (uint i = 0; i < lnum; i++)
1339: {
1340: put_u24(*pos, labels[i] << 4);
1341: ADVANCE(*pos, *size, 3);
1342: }
1343:
1344: /* Add bottom-of-stack flag */
1345: (*pos)[-1] |= BGP_MPLS_BOS;
1346:
1347: *pxlen += 24 * lnum;
1348: }
1349:
1350: static void
1351: bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
1352: {
1353: u32 labels[BGP_MPLS_MAX], label;
1354: uint lnum = 0;
1355:
1356: do {
1357: if (*pxlen < 24)
1358: bgp_parse_error(s, 1);
1359:
1360: label = get_u24(*pos);
1361: labels[lnum++] = label >> 4;
1362: ADVANCE(*pos, *len, 3);
1363: *pxlen -= 24;
1364:
1365: /* RFC 8277 2.4 - withdraw does not have variable-size MPLS stack but
1366: fixed-size 24-bit Compatibility field, which MUST be ignored */
1367: if (!a && !s->err_withdraw)
1368: return;
1369: }
1370: while (!(label & BGP_MPLS_BOS));
1371:
1372: if (!a)
1373: return;
1374:
1375: /* Attach MPLS attribute unless we already have one */
1376: if (!s->mpls_labels)
1377: {
1378: s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
1379: bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
1380: }
1381:
1382: /* Overwrite data in the attribute */
1383: s->mpls_labels->length = 4*lnum;
1384: memcpy(s->mpls_labels->data, labels, 4*lnum);
1385:
1386: /* Update next hop entry in rta */
1387: bgp_apply_mpls_labels(s, a, labels, lnum);
1388:
1389: /* Attributes were changed, invalidate cached entry */
1390: rta_free(s->cached_rta);
1391: s->cached_rta = NULL;
1392:
1393: return;
1394: }
1395:
1396: static uint
1397: bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1398: {
1399: byte *pos = buf;
1400:
1401: while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
1402: {
1403: struct bgp_prefix *px = HEAD(buck->prefixes);
1404: struct net_addr_ip4 *net = (void *) px->net;
1405:
1406: /* Encode path ID */
1407: if (s->add_path)
1408: {
1409: put_u32(pos, px->path_id);
1410: ADVANCE(pos, size, 4);
1411: }
1412:
1413: /* Encode prefix length */
1414: *pos = net->pxlen;
1415: ADVANCE(pos, size, 1);
1416:
1417: /* Encode MPLS labels */
1418: if (s->mpls)
1419: bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1420:
1421: /* Encode prefix body */
1422: ip4_addr a = ip4_hton(net->prefix);
1423: uint b = (net->pxlen + 7) / 8;
1424: memcpy(pos, &a, b);
1425: ADVANCE(pos, size, b);
1426:
1427: bgp_free_prefix(s->channel, px);
1428: }
1429:
1430: return pos - buf;
1431: }
1432:
1433: static void
1434: bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1435: {
1436: while (len)
1437: {
1438: net_addr_ip4 net;
1439: u32 path_id = 0;
1440:
1441: /* Decode path ID */
1442: if (s->add_path)
1443: {
1444: if (len < 5)
1445: bgp_parse_error(s, 1);
1446:
1447: path_id = get_u32(pos);
1448: ADVANCE(pos, len, 4);
1449: }
1450:
1451: /* Decode prefix length */
1452: uint l = *pos;
1453: ADVANCE(pos, len, 1);
1454:
1455: if (len < ((l + 7) / 8))
1456: bgp_parse_error(s, 1);
1457:
1458: /* Decode MPLS labels */
1459: if (s->mpls)
1460: bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1461:
1462: if (l > IP4_MAX_PREFIX_LENGTH)
1463: bgp_parse_error(s, 10);
1464:
1465: /* Decode prefix body */
1466: ip4_addr addr = IP4_NONE;
1467: uint b = (l + 7) / 8;
1468: memcpy(&addr, pos, b);
1469: ADVANCE(pos, len, b);
1470:
1471: net = NET_ADDR_IP4(ip4_ntoh(addr), l);
1472: net_normalize_ip4(&net);
1473:
1474: // XXXX validate prefix
1475:
1476: bgp_rte_update(s, (net_addr *) &net, path_id, a);
1477: }
1478: }
1479:
1480:
1481: static uint
1482: bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1483: {
1484: byte *pos = buf;
1485:
1486: while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
1487: {
1488: struct bgp_prefix *px = HEAD(buck->prefixes);
1489: struct net_addr_ip6 *net = (void *) px->net;
1490:
1491: /* Encode path ID */
1492: if (s->add_path)
1493: {
1494: put_u32(pos, px->path_id);
1495: ADVANCE(pos, size, 4);
1496: }
1497:
1498: /* Encode prefix length */
1499: *pos = net->pxlen;
1500: ADVANCE(pos, size, 1);
1501:
1502: /* Encode MPLS labels */
1503: if (s->mpls)
1504: bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1505:
1506: /* Encode prefix body */
1507: ip6_addr a = ip6_hton(net->prefix);
1508: uint b = (net->pxlen + 7) / 8;
1509: memcpy(pos, &a, b);
1510: ADVANCE(pos, size, b);
1511:
1512: bgp_free_prefix(s->channel, px);
1513: }
1514:
1515: return pos - buf;
1516: }
1517:
1518: static void
1519: bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1520: {
1521: while (len)
1522: {
1523: net_addr_ip6 net;
1524: u32 path_id = 0;
1525:
1526: /* Decode path ID */
1527: if (s->add_path)
1528: {
1529: if (len < 5)
1530: bgp_parse_error(s, 1);
1531:
1532: path_id = get_u32(pos);
1533: ADVANCE(pos, len, 4);
1534: }
1535:
1536: /* Decode prefix length */
1537: uint l = *pos;
1538: ADVANCE(pos, len, 1);
1539:
1540: if (len < ((l + 7) / 8))
1541: bgp_parse_error(s, 1);
1542:
1543: /* Decode MPLS labels */
1544: if (s->mpls)
1545: bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1546:
1547: if (l > IP6_MAX_PREFIX_LENGTH)
1548: bgp_parse_error(s, 10);
1549:
1550: /* Decode prefix body */
1551: ip6_addr addr = IP6_NONE;
1552: uint b = (l + 7) / 8;
1553: memcpy(&addr, pos, b);
1554: ADVANCE(pos, len, b);
1555:
1556: net = NET_ADDR_IP6(ip6_ntoh(addr), l);
1557: net_normalize_ip6(&net);
1558:
1559: // XXXX validate prefix
1560:
1561: bgp_rte_update(s, (net_addr *) &net, path_id, a);
1562: }
1563: }
1564:
1565: static uint
1566: bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1567: {
1568: byte *pos = buf;
1569:
1570: while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
1571: {
1572: struct bgp_prefix *px = HEAD(buck->prefixes);
1573: struct net_addr_vpn4 *net = (void *) px->net;
1574:
1575: /* Encode path ID */
1576: if (s->add_path)
1577: {
1578: put_u32(pos, px->path_id);
1579: ADVANCE(pos, size, 4);
1580: }
1581:
1582: /* Encode prefix length */
1583: *pos = 64 + net->pxlen;
1584: ADVANCE(pos, size, 1);
1585:
1586: /* Encode MPLS labels */
1587: if (s->mpls)
1588: bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1589:
1590: /* Encode route distinguisher */
1591: put_u64(pos, net->rd);
1592: ADVANCE(pos, size, 8);
1593:
1594: /* Encode prefix body */
1595: ip4_addr a = ip4_hton(net->prefix);
1596: uint b = (net->pxlen + 7) / 8;
1597: memcpy(pos, &a, b);
1598: ADVANCE(pos, size, b);
1599:
1600: bgp_free_prefix(s->channel, px);
1601: }
1602:
1603: return pos - buf;
1604: }
1605:
1606: static void
1607: bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1608: {
1609: while (len)
1610: {
1611: net_addr_vpn4 net;
1612: u32 path_id = 0;
1613:
1614: /* Decode path ID */
1615: if (s->add_path)
1616: {
1617: if (len < 5)
1618: bgp_parse_error(s, 1);
1619:
1620: path_id = get_u32(pos);
1621: ADVANCE(pos, len, 4);
1622: }
1623:
1624: /* Decode prefix length */
1625: uint l = *pos;
1626: ADVANCE(pos, len, 1);
1627:
1628: if (len < ((l + 7) / 8))
1629: bgp_parse_error(s, 1);
1630:
1631: /* Decode MPLS labels */
1632: if (s->mpls)
1633: bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1634:
1635: /* Decode route distinguisher */
1636: if (l < 64)
1637: bgp_parse_error(s, 1);
1638:
1639: u64 rd = get_u64(pos);
1640: ADVANCE(pos, len, 8);
1641: l -= 64;
1642:
1643: if (l > IP4_MAX_PREFIX_LENGTH)
1644: bgp_parse_error(s, 10);
1645:
1646: /* Decode prefix body */
1647: ip4_addr addr = IP4_NONE;
1648: uint b = (l + 7) / 8;
1649: memcpy(&addr, pos, b);
1650: ADVANCE(pos, len, b);
1651:
1652: net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd);
1653: net_normalize_vpn4(&net);
1654:
1655: // XXXX validate prefix
1656:
1657: bgp_rte_update(s, (net_addr *) &net, path_id, a);
1658: }
1659: }
1660:
1661:
1662: static uint
1663: bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1664: {
1665: byte *pos = buf;
1666:
1667: while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
1668: {
1669: struct bgp_prefix *px = HEAD(buck->prefixes);
1670: struct net_addr_vpn6 *net = (void *) px->net;
1671:
1672: /* Encode path ID */
1673: if (s->add_path)
1674: {
1675: put_u32(pos, px->path_id);
1676: ADVANCE(pos, size, 4);
1677: }
1678:
1679: /* Encode prefix length */
1680: *pos = 64 + net->pxlen;
1681: ADVANCE(pos, size, 1);
1682:
1683: /* Encode MPLS labels */
1684: if (s->mpls)
1685: bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
1686:
1687: /* Encode route distinguisher */
1688: put_u64(pos, net->rd);
1689: ADVANCE(pos, size, 8);
1690:
1691: /* Encode prefix body */
1692: ip6_addr a = ip6_hton(net->prefix);
1693: uint b = (net->pxlen + 7) / 8;
1694: memcpy(pos, &a, b);
1695: ADVANCE(pos, size, b);
1696:
1697: bgp_free_prefix(s->channel, px);
1698: }
1699:
1700: return pos - buf;
1701: }
1702:
1703: static void
1704: bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1705: {
1706: while (len)
1707: {
1708: net_addr_vpn6 net;
1709: u32 path_id = 0;
1710:
1711: /* Decode path ID */
1712: if (s->add_path)
1713: {
1714: if (len < 5)
1715: bgp_parse_error(s, 1);
1716:
1717: path_id = get_u32(pos);
1718: ADVANCE(pos, len, 4);
1719: }
1720:
1721: /* Decode prefix length */
1722: uint l = *pos;
1723: ADVANCE(pos, len, 1);
1724:
1725: if (len < ((l + 7) / 8))
1726: bgp_parse_error(s, 1);
1727:
1728: /* Decode MPLS labels */
1729: if (s->mpls)
1730: bgp_decode_mpls_labels(s, &pos, &len, &l, a);
1731:
1732: /* Decode route distinguisher */
1733: if (l < 64)
1734: bgp_parse_error(s, 1);
1735:
1736: u64 rd = get_u64(pos);
1737: ADVANCE(pos, len, 8);
1738: l -= 64;
1739:
1740: if (l > IP6_MAX_PREFIX_LENGTH)
1741: bgp_parse_error(s, 10);
1742:
1743: /* Decode prefix body */
1744: ip6_addr addr = IP6_NONE;
1745: uint b = (l + 7) / 8;
1746: memcpy(&addr, pos, b);
1747: ADVANCE(pos, len, b);
1748:
1749: net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd);
1750: net_normalize_vpn6(&net);
1751:
1752: // XXXX validate prefix
1753:
1754: bgp_rte_update(s, (net_addr *) &net, path_id, a);
1755: }
1756: }
1757:
1758:
1759: static uint
1760: bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1761: {
1762: byte *pos = buf;
1763:
1764: while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
1765: {
1766: struct bgp_prefix *px = HEAD(buck->prefixes);
1767: struct net_addr_flow4 *net = (void *) px->net;
1768: uint flen = net->length - sizeof(net_addr_flow4);
1769:
1770: /* Encode path ID */
1771: if (s->add_path)
1772: {
1773: put_u32(pos, px->path_id);
1774: ADVANCE(pos, size, 4);
1775: }
1776:
1777: if (flen > size)
1778: break;
1779:
1780: /* Copy whole flow data including length */
1781: memcpy(pos, net->data, flen);
1782: ADVANCE(pos, size, flen);
1783:
1784: bgp_free_prefix(s->channel, px);
1785: }
1786:
1787: return pos - buf;
1788: }
1789:
1790: static void
1791: bgp_decode_nlri_flow4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1792: {
1793: while (len)
1794: {
1795: u32 path_id = 0;
1796:
1797: /* Decode path ID */
1798: if (s->add_path)
1799: {
1800: if (len < 4)
1801: bgp_parse_error(s, 1);
1802:
1803: path_id = get_u32(pos);
1804: ADVANCE(pos, len, 4);
1805: }
1806:
1807: if (len < 2)
1808: bgp_parse_error(s, 1);
1809:
1810: /* Decode flow length */
1811: uint hlen = flow_hdr_length(pos);
1812: uint dlen = flow_read_length(pos);
1813: uint flen = hlen + dlen;
1814: byte *data = pos + hlen;
1815:
1816: if (len < flen)
1817: bgp_parse_error(s, 1);
1818:
1819: /* Validate flow data */
1820: enum flow_validated_state r = flow4_validate(data, dlen);
1821: if (r != FLOW_ST_VALID)
1822: {
1823: log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
1824: bgp_parse_error(s, 1);
1825: }
1826:
1827: if (data[0] != FLOW_TYPE_DST_PREFIX)
1828: {
1829: log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
1830: bgp_parse_error(s, 1);
1831: }
1832:
1833: /* Decode dst prefix */
1834: ip4_addr px = IP4_NONE;
1835: uint pxlen = data[1];
1836:
1837: // FIXME: Use some generic function
1838: memcpy(&px, data+2, BYTES(pxlen));
1839: px = ip4_and(ip4_ntoh(px), ip4_mkmask(pxlen));
1840:
1841: /* Prepare the flow */
1842: net_addr *n = alloca(sizeof(struct net_addr_flow4) + flen);
1843: net_fill_flow4(n, px, pxlen, pos, flen);
1844: ADVANCE(pos, len, flen);
1845:
1846: bgp_rte_update(s, n, path_id, a);
1847: }
1848: }
1849:
1850:
1851: static uint
1852: bgp_encode_nlri_flow6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
1853: {
1854: byte *pos = buf;
1855:
1856: while (!EMPTY_LIST(buck->prefixes) && (size >= 4))
1857: {
1858: struct bgp_prefix *px = HEAD(buck->prefixes);
1859: struct net_addr_flow6 *net = (void *) px->net;
1860: uint flen = net->length - sizeof(net_addr_flow6);
1861:
1862: /* Encode path ID */
1863: if (s->add_path)
1864: {
1865: put_u32(pos, px->path_id);
1866: ADVANCE(pos, size, 4);
1867: }
1868:
1869: if (flen > size)
1870: break;
1871:
1872: /* Copy whole flow data including length */
1873: memcpy(pos, net->data, flen);
1874: ADVANCE(pos, size, flen);
1875:
1876: bgp_free_prefix(s->channel, px);
1877: }
1878:
1879: return pos - buf;
1880: }
1881:
1882: static void
1883: bgp_decode_nlri_flow6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
1884: {
1885: while (len)
1886: {
1887: u32 path_id = 0;
1888:
1889: /* Decode path ID */
1890: if (s->add_path)
1891: {
1892: if (len < 4)
1893: bgp_parse_error(s, 1);
1894:
1895: path_id = get_u32(pos);
1896: ADVANCE(pos, len, 4);
1897: }
1898:
1899: if (len < 2)
1900: bgp_parse_error(s, 1);
1901:
1902: /* Decode flow length */
1903: uint hlen = flow_hdr_length(pos);
1904: uint dlen = flow_read_length(pos);
1905: uint flen = hlen + dlen;
1906: byte *data = pos + hlen;
1907:
1908: if (len < flen)
1909: bgp_parse_error(s, 1);
1910:
1911: /* Validate flow data */
1912: enum flow_validated_state r = flow6_validate(data, dlen);
1913: if (r != FLOW_ST_VALID)
1914: {
1915: log(L_REMOTE "%s: Invalid flow route: %s", s->proto->p.name, flow_validated_state_str(r));
1916: bgp_parse_error(s, 1);
1917: }
1918:
1919: if (data[0] != FLOW_TYPE_DST_PREFIX)
1920: {
1921: log(L_REMOTE "%s: No dst prefix at first pos", s->proto->p.name);
1922: bgp_parse_error(s, 1);
1923: }
1924:
1925: /* Decode dst prefix */
1926: ip6_addr px = IP6_NONE;
1927: uint pxlen = data[1];
1928:
1929: // FIXME: Use some generic function
1930: memcpy(&px, data+2, BYTES(pxlen));
1931: px = ip6_and(ip6_ntoh(px), ip6_mkmask(pxlen));
1932:
1933: /* Prepare the flow */
1934: net_addr *n = alloca(sizeof(struct net_addr_flow6) + flen);
1935: net_fill_flow6(n, px, pxlen, pos, flen);
1936: ADVANCE(pos, len, flen);
1937:
1938: bgp_rte_update(s, n, path_id, a);
1939: }
1940: }
1941:
1942:
1943: static const struct bgp_af_desc bgp_af_table[] = {
1944: {
1945: .afi = BGP_AF_IPV4,
1946: .net = NET_IP4,
1947: .name = "ipv4",
1948: .encode_nlri = bgp_encode_nlri_ip4,
1949: .decode_nlri = bgp_decode_nlri_ip4,
1950: .encode_next_hop = bgp_encode_next_hop_ip,
1951: .decode_next_hop = bgp_decode_next_hop_ip,
1952: .update_next_hop = bgp_update_next_hop_ip,
1953: },
1954: {
1955: .afi = BGP_AF_IPV4_MC,
1956: .net = NET_IP4,
1957: .name = "ipv4-mc",
1958: .encode_nlri = bgp_encode_nlri_ip4,
1959: .decode_nlri = bgp_decode_nlri_ip4,
1960: .encode_next_hop = bgp_encode_next_hop_ip,
1961: .decode_next_hop = bgp_decode_next_hop_ip,
1962: .update_next_hop = bgp_update_next_hop_ip,
1963: },
1964: {
1965: .afi = BGP_AF_IPV4_MPLS,
1966: .net = NET_IP4,
1967: .mpls = 1,
1968: .name = "ipv4-mpls",
1969: .encode_nlri = bgp_encode_nlri_ip4,
1970: .decode_nlri = bgp_decode_nlri_ip4,
1971: .encode_next_hop = bgp_encode_next_hop_ip,
1972: .decode_next_hop = bgp_decode_next_hop_ip,
1973: .update_next_hop = bgp_update_next_hop_ip,
1974: },
1975: {
1976: .afi = BGP_AF_IPV6,
1977: .net = NET_IP6,
1978: .name = "ipv6",
1979: .encode_nlri = bgp_encode_nlri_ip6,
1980: .decode_nlri = bgp_decode_nlri_ip6,
1981: .encode_next_hop = bgp_encode_next_hop_ip,
1982: .decode_next_hop = bgp_decode_next_hop_ip,
1983: .update_next_hop = bgp_update_next_hop_ip,
1984: },
1985: {
1986: .afi = BGP_AF_IPV6_MC,
1987: .net = NET_IP6,
1988: .name = "ipv6-mc",
1989: .encode_nlri = bgp_encode_nlri_ip6,
1990: .decode_nlri = bgp_decode_nlri_ip6,
1991: .encode_next_hop = bgp_encode_next_hop_ip,
1992: .decode_next_hop = bgp_decode_next_hop_ip,
1993: .update_next_hop = bgp_update_next_hop_ip,
1994: },
1995: {
1996: .afi = BGP_AF_IPV6_MPLS,
1997: .net = NET_IP6,
1998: .mpls = 1,
1999: .name = "ipv6-mpls",
2000: .encode_nlri = bgp_encode_nlri_ip6,
2001: .decode_nlri = bgp_decode_nlri_ip6,
2002: .encode_next_hop = bgp_encode_next_hop_ip,
2003: .decode_next_hop = bgp_decode_next_hop_ip,
2004: .update_next_hop = bgp_update_next_hop_ip,
2005: },
2006: {
2007: .afi = BGP_AF_VPN4_MPLS,
2008: .net = NET_VPN4,
2009: .mpls = 1,
2010: .name = "vpn4-mpls",
2011: .encode_nlri = bgp_encode_nlri_vpn4,
2012: .decode_nlri = bgp_decode_nlri_vpn4,
2013: .encode_next_hop = bgp_encode_next_hop_vpn,
2014: .decode_next_hop = bgp_decode_next_hop_vpn,
2015: .update_next_hop = bgp_update_next_hop_ip,
2016: },
2017: {
2018: .afi = BGP_AF_VPN6_MPLS,
2019: .net = NET_VPN6,
2020: .mpls = 1,
2021: .name = "vpn6-mpls",
2022: .encode_nlri = bgp_encode_nlri_vpn6,
2023: .decode_nlri = bgp_decode_nlri_vpn6,
2024: .encode_next_hop = bgp_encode_next_hop_vpn,
2025: .decode_next_hop = bgp_decode_next_hop_vpn,
2026: .update_next_hop = bgp_update_next_hop_ip,
2027: },
2028: {
2029: .afi = BGP_AF_VPN4_MC,
2030: .net = NET_VPN4,
2031: .name = "vpn4-mc",
2032: .encode_nlri = bgp_encode_nlri_vpn4,
2033: .decode_nlri = bgp_decode_nlri_vpn4,
2034: .encode_next_hop = bgp_encode_next_hop_vpn,
2035: .decode_next_hop = bgp_decode_next_hop_vpn,
2036: .update_next_hop = bgp_update_next_hop_ip,
2037: },
2038: {
2039: .afi = BGP_AF_VPN6_MC,
2040: .net = NET_VPN6,
2041: .name = "vpn6-mc",
2042: .encode_nlri = bgp_encode_nlri_vpn6,
2043: .decode_nlri = bgp_decode_nlri_vpn6,
2044: .encode_next_hop = bgp_encode_next_hop_vpn,
2045: .decode_next_hop = bgp_decode_next_hop_vpn,
2046: .update_next_hop = bgp_update_next_hop_ip,
2047: },
2048: {
2049: .afi = BGP_AF_FLOW4,
2050: .net = NET_FLOW4,
2051: .no_igp = 1,
2052: .name = "flow4",
2053: .encode_nlri = bgp_encode_nlri_flow4,
2054: .decode_nlri = bgp_decode_nlri_flow4,
2055: .encode_next_hop = bgp_encode_next_hop_none,
2056: .decode_next_hop = bgp_decode_next_hop_none,
2057: .update_next_hop = bgp_update_next_hop_none,
2058: },
2059: {
2060: .afi = BGP_AF_FLOW6,
2061: .net = NET_FLOW6,
2062: .no_igp = 1,
2063: .name = "flow6",
2064: .encode_nlri = bgp_encode_nlri_flow6,
2065: .decode_nlri = bgp_decode_nlri_flow6,
2066: .encode_next_hop = bgp_encode_next_hop_none,
2067: .decode_next_hop = bgp_decode_next_hop_none,
2068: .update_next_hop = bgp_update_next_hop_none,
2069: },
2070: };
2071:
2072: const struct bgp_af_desc *
2073: bgp_get_af_desc(u32 afi)
2074: {
2075: uint i;
2076: for (i = 0; i < ARRAY_SIZE(bgp_af_table); i++)
2077: if (bgp_af_table[i].afi == afi)
2078: return &bgp_af_table[i];
2079:
2080: return NULL;
2081: }
2082:
2083: static inline uint
2084: bgp_encode_nlri(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
2085: {
2086: return s->channel->desc->encode_nlri(s, buck, buf, end - buf);
2087: }
2088:
2089: static inline uint
2090: bgp_encode_next_hop(struct bgp_write_state *s, eattr *nh, byte *buf)
2091: {
2092: return s->channel->desc->encode_next_hop(s, nh, buf, 255);
2093: }
2094:
2095: void
2096: bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to)
2097: {
2098: s->channel->desc->update_next_hop(s, a, to);
2099: }
2100:
2101: #define MAX_ATTRS_LENGTH (end-buf+BGP_HEADER_LENGTH - 1024)
2102:
2103: static byte *
2104: bgp_create_ip_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
2105: {
2106: /*
2107: * 2 B Withdrawn Routes Length (zero)
2108: * --- IPv4 Withdrawn Routes NLRI (unused)
2109: * 2 B Total Path Attribute Length
2110: * var Path Attributes
2111: * var IPv4 Network Layer Reachability Information
2112: */
2113:
2114: int lr, la;
2115:
2116: la = bgp_encode_attrs(s, buck->eattrs, buf+4, buf + MAX_ATTRS_LENGTH);
2117: if (la < 0)
2118: {
2119: /* Attribute list too long */
2120: bgp_withdraw_bucket(s->channel, buck);
2121: return NULL;
2122: }
2123:
2124: put_u16(buf+0, 0);
2125: put_u16(buf+2, la);
2126:
2127: lr = bgp_encode_nlri(s, buck, buf+4+la, end);
2128:
2129: return buf+4+la+lr;
2130: }
2131:
2132: static byte *
2133: bgp_create_mp_reach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
2134: {
2135: /*
2136: * 2 B IPv4 Withdrawn Routes Length (zero)
2137: * --- IPv4 Withdrawn Routes NLRI (unused)
2138: * 2 B Total Path Attribute Length
2139: * 1 B MP_REACH_NLRI hdr - Attribute Flags
2140: * 1 B MP_REACH_NLRI hdr - Attribute Type Code
2141: * 2 B MP_REACH_NLRI hdr - Length of Attribute Data
2142: * 2 B MP_REACH_NLRI data - Address Family Identifier
2143: * 1 B MP_REACH_NLRI data - Subsequent Address Family Identifier
2144: * 1 B MP_REACH_NLRI data - Length of Next Hop Network Address
2145: * var MP_REACH_NLRI data - Network Address of Next Hop
2146: * 1 B MP_REACH_NLRI data - Reserved (zero)
2147: * var MP_REACH_NLRI data - Network Layer Reachability Information
2148: * var Rest of Path Attributes
2149: * --- IPv4 Network Layer Reachability Information (unused)
2150: */
2151:
2152: int lh, lr, la; /* Lengths of next hop, NLRI and attributes */
2153:
2154: /* Begin of MP_REACH_NLRI atribute */
2155: buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
2156: buf[5] = BA_MP_REACH_NLRI;
2157: put_u16(buf+6, 0); /* Will be fixed later */
2158: put_af3(buf+8, s->channel->afi);
2159: byte *pos = buf+11;
2160:
2161: /* Encode attributes to temporary buffer */
2162: byte *abuf = alloca(MAX_ATTRS_LENGTH);
2163: la = bgp_encode_attrs(s, buck->eattrs, abuf, abuf + MAX_ATTRS_LENGTH);
2164: if (la < 0)
2165: {
2166: /* Attribute list too long */
2167: bgp_withdraw_bucket(s->channel, buck);
2168: return NULL;
2169: }
2170:
2171: /* Encode the next hop */
2172: lh = bgp_encode_next_hop(s, s->mp_next_hop, pos+1);
2173: *pos = lh;
2174: pos += 1+lh;
2175:
2176: /* Reserved field */
2177: *pos++ = 0;
2178:
2179: /* Encode the NLRI */
2180: lr = bgp_encode_nlri(s, buck, pos, end - la);
2181: pos += lr;
2182:
2183: /* End of MP_REACH_NLRI atribute, update data length */
2184: put_u16(buf+6, pos-buf-8);
2185:
2186: /* Copy remaining attributes */
2187: memcpy(pos, abuf, la);
2188: pos += la;
2189:
2190: /* Initial UPDATE fields */
2191: put_u16(buf+0, 0);
2192: put_u16(buf+2, pos-buf-4);
2193:
2194: return pos;
2195: }
2196:
2197: #undef MAX_ATTRS_LENGTH
2198:
2199: static byte *
2200: bgp_create_ip_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
2201: {
2202: /*
2203: * 2 B Withdrawn Routes Length
2204: * var IPv4 Withdrawn Routes NLRI
2205: * 2 B Total Path Attribute Length (zero)
2206: * --- Path Attributes (unused)
2207: * --- IPv4 Network Layer Reachability Information (unused)
2208: */
2209:
2210: uint len = bgp_encode_nlri(s, buck, buf+2, end);
2211:
2212: put_u16(buf+0, len);
2213: put_u16(buf+2+len, 0);
2214:
2215: return buf+4+len;
2216: }
2217:
2218: static byte *
2219: bgp_create_mp_unreach(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, byte *end)
2220: {
2221: /*
2222: * 2 B Withdrawn Routes Length (zero)
2223: * --- IPv4 Withdrawn Routes NLRI (unused)
2224: * 2 B Total Path Attribute Length
2225: * 1 B MP_UNREACH_NLRI hdr - Attribute Flags
2226: * 1 B MP_UNREACH_NLRI hdr - Attribute Type Code
2227: * 2 B MP_UNREACH_NLRI hdr - Length of Attribute Data
2228: * 2 B MP_UNREACH_NLRI data - Address Family Identifier
2229: * 1 B MP_UNREACH_NLRI data - Subsequent Address Family Identifier
2230: * var MP_UNREACH_NLRI data - Network Layer Reachability Information
2231: * --- IPv4 Network Layer Reachability Information (unused)
2232: */
2233:
2234: uint len = bgp_encode_nlri(s, buck, buf+11, end);
2235:
2236: put_u16(buf+0, 0);
2237: put_u16(buf+2, 7+len);
2238:
2239: /* Begin of MP_UNREACH_NLRI atribute */
2240: buf[4] = BAF_OPTIONAL | BAF_EXT_LEN;
2241: buf[5] = BA_MP_UNREACH_NLRI;
2242: put_u16(buf+6, 3+len);
2243: put_af3(buf+8, s->channel->afi);
2244:
2245: return buf+11+len;
2246: }
2247:
2248: static byte *
2249: bgp_create_update(struct bgp_channel *c, byte *buf)
2250: {
2251: struct bgp_proto *p = (void *) c->c.proto;
2252: struct bgp_bucket *buck;
2253: byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH);
2254: byte *res = NULL;
2255:
2256: again: ;
2257:
2258: /* Initialize write state */
2259: struct bgp_write_state s = {
2260: .proto = p,
2261: .channel = c,
2262: .pool = bgp_linpool,
2263: .mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop,
2264: .as4_session = p->as4_session,
2265: .add_path = c->add_path_tx,
2266: .mpls = c->desc->mpls,
2267: };
2268:
2269: /* Try unreachable bucket */
2270: if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
2271: {
2272: res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
2273: bgp_create_ip_unreach(&s, buck, buf, end):
2274: bgp_create_mp_unreach(&s, buck, buf, end);
2275:
2276: goto done;
2277: }
2278:
2279: /* Try reachable buckets */
2280: if (!EMPTY_LIST(c->bucket_queue))
2281: {
2282: buck = HEAD(c->bucket_queue);
2283:
2284: /* Cleanup empty buckets */
2285: if (EMPTY_LIST(buck->prefixes))
2286: {
2287: bgp_free_bucket(c, buck);
2288: goto again;
2289: }
2290:
2291: res = !s.mp_reach ?
2292: bgp_create_ip_reach(&s, buck, buf, end):
2293: bgp_create_mp_reach(&s, buck, buf, end);
2294:
2295: if (EMPTY_LIST(buck->prefixes))
2296: bgp_free_bucket(c, buck);
2297: else
2298: bgp_defer_bucket(c, buck);
2299:
2300: if (!res)
2301: goto again;
2302:
2303: goto done;
2304: }
2305:
2306: /* No more prefixes to send */
2307: return NULL;
2308:
2309: done:
2310: BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
2311: lp_flush(s.pool);
2312:
2313: return res;
2314: }
2315:
2316: static byte *
2317: bgp_create_ip_end_mark(struct bgp_channel *c UNUSED, byte *buf)
2318: {
2319: /* Empty update packet */
2320: put_u32(buf, 0);
2321:
2322: return buf+4;
2323: }
2324:
2325: static byte *
2326: bgp_create_mp_end_mark(struct bgp_channel *c, byte *buf)
2327: {
2328: put_u16(buf+0, 0);
2329: put_u16(buf+2, 6); /* length 4--9 */
2330:
2331: /* Empty MP_UNREACH_NLRI atribute */
2332: buf[4] = BAF_OPTIONAL;
2333: buf[5] = BA_MP_UNREACH_NLRI;
2334: buf[6] = 3; /* Length 7--9 */
2335: put_af3(buf+7, c->afi);
2336:
2337: return buf+10;
2338: }
2339:
2340: static byte *
2341: bgp_create_end_mark(struct bgp_channel *c, byte *buf)
2342: {
2343: struct bgp_proto *p = (void *) c->c.proto;
2344:
2345: BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
2346:
2347: return (c->afi == BGP_AF_IPV4) ?
2348: bgp_create_ip_end_mark(c, buf):
2349: bgp_create_mp_end_mark(c, buf);
2350: }
2351:
2352: static inline void
2353: bgp_rx_end_mark(struct bgp_parse_state *s, u32 afi)
2354: {
2355: struct bgp_proto *p = s->proto;
2356: struct bgp_channel *c = bgp_get_channel(p, afi);
2357:
2358: BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
2359:
2360: if (!c)
2361: DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
2362:
2363: if (c->load_state == BFS_LOADING)
2364: c->load_state = BFS_NONE;
2365:
2366: if (p->p.gr_recovery)
2367: channel_graceful_restart_unlock(&c->c);
2368:
2369: if (c->gr_active)
2370: bgp_graceful_restart_done(c);
2371: }
2372:
2373: static inline void
2374: bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_list *ea, byte *nh, uint nh_len)
2375: {
2376: struct bgp_channel *c = bgp_get_channel(s->proto, afi);
2377: rta *a = NULL;
2378:
2379: if (!c)
2380: DISCARD(BAD_AFI, BGP_AFI(afi), BGP_SAFI(afi));
2381:
2382: s->channel = c;
2383: s->add_path = c->add_path_rx;
2384: s->mpls = c->desc->mpls;
2385:
2386: s->last_id = 0;
2387: s->last_src = s->proto->p.main_source;
2388:
2389: /*
2390: * IPv4 BGP and MP-BGP may be used together in one update, therefore we do not
2391: * add BA_NEXT_HOP in bgp_decode_attrs(), but we add it here independently for
2392: * IPv4 BGP and MP-BGP. We undo the attribute (and possibly others attached by
2393: * decode_next_hop hooks) by restoring a->eattrs afterwards.
2394: */
2395:
2396: if (ea)
2397: {
2398: a = allocz(RTA_MAX_SIZE);
2399:
2400: a->source = RTS_BGP;
2401: a->scope = SCOPE_UNIVERSE;
2402: a->from = s->proto->remote_ip;
2403: a->eattrs = ea;
2404:
2405: c->desc->decode_next_hop(s, nh, nh_len, a);
2406: bgp_finish_attrs(s, a);
2407:
2408: /* Handle withdraw during next hop decoding */
2409: if (s->err_withdraw)
2410: a = NULL;
2411: }
2412:
2413: c->desc->decode_nlri(s, nlri, len, a);
2414:
2415: rta_free(s->cached_rta);
2416: s->cached_rta = NULL;
2417: }
2418:
2419: static void
2420: bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
2421: {
2422: struct bgp_proto *p = conn->bgp;
2423: ea_list *ea = NULL;
2424:
2425: BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
2426:
2427: /* Workaround for some BGP implementations that skip initial KEEPALIVE */
2428: if (conn->state == BS_OPENCONFIRM)
2429: bgp_conn_enter_established_state(conn);
2430:
2431: if (conn->state != BS_ESTABLISHED)
2432: { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
2433:
2434: bgp_start_timer(conn->hold_timer, conn->hold_time);
2435:
2436: /* Initialize parse state */
2437: struct bgp_parse_state s = {
2438: .proto = p,
2439: .pool = bgp_linpool,
2440: .as4_session = p->as4_session,
2441: };
2442:
2443: /* Parse error handler */
2444: if (setjmp(s.err_jmpbuf))
2445: {
2446: bgp_error(conn, 3, s.err_subcode, NULL, 0);
2447: goto done;
2448: }
2449:
2450: /* Check minimal length */
2451: if (len < 23)
2452: { bgp_error(conn, 1, 2, pkt+16, 2); return; }
2453:
2454: /* Skip fixed header */
2455: uint pos = 19;
2456:
2457: /*
2458: * UPDATE message format
2459: *
2460: * 2 B IPv4 Withdrawn Routes Length
2461: * var IPv4 Withdrawn Routes NLRI
2462: * 2 B Total Path Attribute Length
2463: * var Path Attributes
2464: * var IPv4 Reachable Routes NLRI
2465: */
2466:
2467: s.ip_unreach_len = get_u16(pkt + pos);
2468: s.ip_unreach_nlri = pkt + pos + 2;
2469: pos += 2 + s.ip_unreach_len;
2470:
2471: if (pos + 2 > len)
2472: bgp_parse_error(&s, 1);
2473:
2474: s.attr_len = get_u16(pkt + pos);
2475: s.attrs = pkt + pos + 2;
2476: pos += 2 + s.attr_len;
2477:
2478: if (pos > len)
2479: bgp_parse_error(&s, 1);
2480:
2481: s.ip_reach_len = len - pos;
2482: s.ip_reach_nlri = pkt + pos;
2483:
2484:
2485: if (s.attr_len)
2486: ea = bgp_decode_attrs(&s, s.attrs, s.attr_len);
2487: else
2488: ea = NULL;
2489:
2490: /* Check for End-of-RIB marker */
2491: if (!s.attr_len && !s.ip_unreach_len && !s.ip_reach_len)
2492: { bgp_rx_end_mark(&s, BGP_AF_IPV4); goto done; }
2493:
2494: /* Check for MP End-of-RIB marker */
2495: if ((s.attr_len < 8) && !s.ip_unreach_len && !s.ip_reach_len &&
2496: !s.mp_reach_len && !s.mp_unreach_len && s.mp_unreach_af)
2497: { bgp_rx_end_mark(&s, s.mp_unreach_af); goto done; }
2498:
2499: if (s.ip_unreach_len)
2500: bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_unreach_nlri, s.ip_unreach_len, NULL, NULL, 0);
2501:
2502: if (s.mp_unreach_len)
2503: bgp_decode_nlri(&s, s.mp_unreach_af, s.mp_unreach_nlri, s.mp_unreach_len, NULL, NULL, 0);
2504:
2505: if (s.ip_reach_len)
2506: bgp_decode_nlri(&s, BGP_AF_IPV4, s.ip_reach_nlri, s.ip_reach_len,
2507: ea, s.ip_next_hop_data, s.ip_next_hop_len);
2508:
2509: if (s.mp_reach_len)
2510: bgp_decode_nlri(&s, s.mp_reach_af, s.mp_reach_nlri, s.mp_reach_len,
2511: ea, s.mp_next_hop_data, s.mp_next_hop_len);
2512:
2513: done:
2514: rta_free(s.cached_rta);
2515: lp_flush(s.pool);
2516: return;
2517: }
2518:
2519: static uint
2520: bgp_find_update_afi(byte *pos, uint len)
2521: {
2522: /*
2523: * This is stripped-down version of bgp_rx_update(), bgp_decode_attrs() and
2524: * bgp_decode_mp_[un]reach_nlri() used by MRT code in order to find out which
2525: * AFI/SAFI is associated with incoming UPDATE. Returns 0 for framing errors.
2526: */
2527: if (len < 23)
2528: return 0;
2529:
2530: /* Assume there is no withrawn NLRI, read lengths and move to attribute list */
2531: uint wlen = get_u16(pos + 19);
2532: uint alen = get_u16(pos + 21);
2533: ADVANCE(pos, len, 23);
2534:
2535: /* Either non-zero withdrawn NLRI, non-zero reachable NLRI, or IPv4 End-of-RIB */
2536: if ((wlen != 0) || (alen < len) || !alen)
2537: return BGP_AF_IPV4;
2538:
2539: if (alen > len)
2540: return 0;
2541:
2542: /* Process attribute list (alen == len) */
2543: while (len)
2544: {
2545: if (len < 2)
2546: return 0;
2547:
2548: uint flags = pos[0];
2549: uint code = pos[1];
2550: ADVANCE(pos, len, 2);
2551:
2552: uint ll = !(flags & BAF_EXT_LEN) ? 1 : 2;
2553: if (len < ll)
2554: return 0;
2555:
2556: /* Read attribute length and move to attribute body */
2557: alen = (ll == 1) ? get_u8(pos) : get_u16(pos);
2558: ADVANCE(pos, len, ll);
2559:
2560: if (len < alen)
2561: return 0;
2562:
2563: /* Found MP NLRI */
2564: if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
2565: {
2566: if (alen < 3)
2567: return 0;
2568:
2569: return BGP_AF(get_u16(pos), pos[2]);
2570: }
2571:
2572: /* Move to the next attribute */
2573: ADVANCE(pos, len, alen);
2574: }
2575:
2576: /* No basic or MP NLRI, but there are some attributes -> error */
2577: return 0;
2578: }
2579:
2580:
2581: /*
2582: * ROUTE-REFRESH
2583: */
2584:
2585: static inline byte *
2586: bgp_create_route_refresh(struct bgp_channel *c, byte *buf)
2587: {
2588: struct bgp_proto *p = (void *) c->c.proto;
2589:
2590: BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
2591:
2592: /* Original route refresh request, RFC 2918 */
2593: put_af4(buf, c->afi);
2594: buf[2] = BGP_RR_REQUEST;
2595:
2596: return buf+4;
2597: }
2598:
2599: static inline byte *
2600: bgp_create_begin_refresh(struct bgp_channel *c, byte *buf)
2601: {
2602: struct bgp_proto *p = (void *) c->c.proto;
2603:
2604: BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
2605:
2606: /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
2607: put_af4(buf, c->afi);
2608: buf[2] = BGP_RR_BEGIN;
2609:
2610: return buf+4;
2611: }
2612:
2613: static inline byte *
2614: bgp_create_end_refresh(struct bgp_channel *c, byte *buf)
2615: {
2616: struct bgp_proto *p = (void *) c->c.proto;
2617:
2618: BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
2619:
2620: /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
2621: put_af4(buf, c->afi);
2622: buf[2] = BGP_RR_END;
2623:
2624: return buf+4;
2625: }
2626:
2627: static void
2628: bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
2629: {
2630: struct bgp_proto *p = conn->bgp;
2631:
2632: if (conn->state != BS_ESTABLISHED)
2633: { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
2634:
2635: if (!conn->local_caps->route_refresh)
2636: { bgp_error(conn, 1, 3, pkt+18, 1); return; }
2637:
2638: if (len < (BGP_HEADER_LENGTH + 4))
2639: { bgp_error(conn, 1, 2, pkt+16, 2); return; }
2640:
2641: if (len > (BGP_HEADER_LENGTH + 4))
2642: { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
2643:
2644: struct bgp_channel *c = bgp_get_channel(p, get_af4(pkt+19));
2645: if (!c)
2646: {
2647: log(L_WARN "%s: Got ROUTE-REFRESH subtype %u for AF %u.%u, ignoring",
2648: p->p.name, pkt[21], get_u16(pkt+19), pkt[22]);
2649: return;
2650: }
2651:
2652: /* RFC 7313 redefined reserved field as RR message subtype */
2653: uint subtype = p->enhanced_refresh ? pkt[21] : BGP_RR_REQUEST;
2654:
2655: switch (subtype)
2656: {
2657: case BGP_RR_REQUEST:
2658: BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
2659: channel_request_feeding(&c->c);
2660: break;
2661:
2662: case BGP_RR_BEGIN:
2663: BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
2664: bgp_refresh_begin(c);
2665: break;
2666:
2667: case BGP_RR_END:
2668: BGP_TRACE(D_PACKETS, "Got END-OF-RR");
2669: bgp_refresh_end(c);
2670: break;
2671:
2672: default:
2673: log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
2674: p->p.name, subtype);
2675: break;
2676: }
2677: }
2678:
2679: static inline struct bgp_channel *
2680: bgp_get_channel_to_send(struct bgp_proto *p, struct bgp_conn *conn)
2681: {
2682: uint i = conn->last_channel;
2683:
2684: /* Try the last channel, but at most several times */
2685: if ((conn->channels_to_send & (1 << i)) &&
2686: (conn->last_channel_count < 16))
2687: goto found;
2688:
2689: /* Find channel with non-zero channels_to_send */
2690: do
2691: {
2692: i++;
2693: if (i >= p->channel_count)
2694: i = 0;
2695: }
2696: while (! (conn->channels_to_send & (1 << i)));
2697:
2698: /* Use that channel */
2699: conn->last_channel = i;
2700: conn->last_channel_count = 0;
2701:
2702: found:
2703: conn->last_channel_count++;
2704: return p->channel_map[i];
2705: }
2706:
2707: static inline int
2708: bgp_send(struct bgp_conn *conn, uint type, uint len)
2709: {
2710: sock *sk = conn->sk;
2711: byte *buf = sk->tbuf;
2712:
2713: memset(buf, 0xff, 16); /* Marker */
2714: put_u16(buf+16, len);
2715: buf[18] = type;
2716:
2717: return sk_send(sk, len);
2718: }
2719:
2720: /**
2721: * bgp_fire_tx - transmit packets
2722: * @conn: connection
2723: *
2724: * Whenever the transmit buffers of the underlying TCP connection
2725: * are free and we have any packets queued for sending, the socket functions
2726: * call bgp_fire_tx() which takes care of selecting the highest priority packet
2727: * queued (Notification > Keepalive > Open > Update), assembling its header
2728: * and body and sending it to the connection.
2729: */
2730: static int
2731: bgp_fire_tx(struct bgp_conn *conn)
2732: {
2733: struct bgp_proto *p = conn->bgp;
2734: struct bgp_channel *c;
2735: byte *buf, *pkt, *end;
2736: uint s;
2737:
2738: if (!conn->sk)
2739: return 0;
2740:
2741: buf = conn->sk->tbuf;
2742: pkt = buf + BGP_HEADER_LENGTH;
2743: s = conn->packets_to_send;
2744:
2745: if (s & (1 << PKT_SCHEDULE_CLOSE))
2746: {
2747: /* We can finally close connection and enter idle state */
2748: bgp_conn_enter_idle_state(conn);
2749: return 0;
2750: }
2751: if (s & (1 << PKT_NOTIFICATION))
2752: {
2753: conn->packets_to_send = 1 << PKT_SCHEDULE_CLOSE;
2754: end = bgp_create_notification(conn, pkt);
2755: return bgp_send(conn, PKT_NOTIFICATION, end - buf);
2756: }
2757: else if (s & (1 << PKT_OPEN))
2758: {
2759: conn->packets_to_send &= ~(1 << PKT_OPEN);
2760: end = bgp_create_open(conn, pkt);
2761: return bgp_send(conn, PKT_OPEN, end - buf);
2762: }
2763: else if (s & (1 << PKT_KEEPALIVE))
2764: {
2765: conn->packets_to_send &= ~(1 << PKT_KEEPALIVE);
2766: BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
2767: bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
2768: return bgp_send(conn, PKT_KEEPALIVE, BGP_HEADER_LENGTH);
2769: }
2770: else while (conn->channels_to_send)
2771: {
2772: c = bgp_get_channel_to_send(p, conn);
2773: s = c->packets_to_send;
2774:
2775: if (s & (1 << PKT_ROUTE_REFRESH))
2776: {
2777: c->packets_to_send &= ~(1 << PKT_ROUTE_REFRESH);
2778: end = bgp_create_route_refresh(c, pkt);
2779: return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
2780: }
2781: else if (s & (1 << PKT_BEGIN_REFRESH))
2782: {
2783: /* BoRR is a subtype of RR, but uses separate bit in packets_to_send */
2784: c->packets_to_send &= ~(1 << PKT_BEGIN_REFRESH);
2785: end = bgp_create_begin_refresh(c, pkt);
2786: return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
2787: }
2788: else if (s & (1 << PKT_UPDATE))
2789: {
2790: end = bgp_create_update(c, pkt);
2791: if (end)
2792: return bgp_send(conn, PKT_UPDATE, end - buf);
2793:
2794: /* No update to send, perhaps we need to send End-of-RIB or EoRR */
2795: c->packets_to_send = 0;
2796: conn->channels_to_send &= ~(1 << c->index);
2797:
2798: if (c->feed_state == BFS_LOADED)
2799: {
2800: c->feed_state = BFS_NONE;
2801: end = bgp_create_end_mark(c, pkt);
2802: return bgp_send(conn, PKT_UPDATE, end - buf);
2803: }
2804:
2805: else if (c->feed_state == BFS_REFRESHED)
2806: {
2807: c->feed_state = BFS_NONE;
2808: end = bgp_create_end_refresh(c, pkt);
2809: return bgp_send(conn, PKT_ROUTE_REFRESH, end - buf);
2810: }
2811: }
2812: else if (s)
2813: bug("Channel packets_to_send: %x", s);
2814:
2815: c->packets_to_send = 0;
2816: conn->channels_to_send &= ~(1 << c->index);
2817: }
2818:
2819: return 0;
2820: }
2821:
2822: /**
2823: * bgp_schedule_packet - schedule a packet for transmission
2824: * @conn: connection
2825: * @c: channel
2826: * @type: packet type
2827: *
2828: * Schedule a packet of type @type to be sent as soon as possible.
2829: */
2830: void
2831: bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type)
2832: {
2833: ASSERT(conn->sk);
2834:
2835: DBG("BGP: Scheduling packet type %d\n", type);
2836:
2837: if (c)
2838: {
2839: if (! conn->channels_to_send)
2840: {
2841: conn->last_channel = c->index;
2842: conn->last_channel_count = 0;
2843: }
2844:
2845: c->packets_to_send |= 1 << type;
2846: conn->channels_to_send |= 1 << c->index;
2847: }
2848: else
2849: conn->packets_to_send |= 1 << type;
2850:
2851: if ((conn->sk->tpos == conn->sk->tbuf) && !ev_active(conn->tx_ev))
2852: ev_schedule(conn->tx_ev);
2853: }
2854: void
2855: bgp_kick_tx(void *vconn)
2856: {
2857: struct bgp_conn *conn = vconn;
2858:
2859: DBG("BGP: kicking TX\n");
2860: uint max = 1024;
2861: while (--max && (bgp_fire_tx(conn) > 0))
2862: ;
2863:
2864: if (!max && !ev_active(conn->tx_ev))
2865: ev_schedule(conn->tx_ev);
2866: }
2867:
2868: void
2869: bgp_tx(sock *sk)
2870: {
2871: struct bgp_conn *conn = sk->data;
2872:
2873: DBG("BGP: TX hook\n");
2874: uint max = 1024;
2875: while (--max && (bgp_fire_tx(conn) > 0))
2876: ;
2877:
2878: if (!max && !ev_active(conn->tx_ev))
2879: ev_schedule(conn->tx_ev);
2880: }
2881:
2882:
2883: static struct {
2884: byte major, minor;
2885: byte *msg;
2886: } bgp_msg_table[] = {
2887: { 1, 0, "Invalid message header" },
2888: { 1, 1, "Connection not synchronized" },
2889: { 1, 2, "Bad message length" },
2890: { 1, 3, "Bad message type" },
2891: { 2, 0, "Invalid OPEN message" },
2892: { 2, 1, "Unsupported version number" },
2893: { 2, 2, "Bad peer AS" },
2894: { 2, 3, "Bad BGP identifier" },
2895: { 2, 4, "Unsupported optional parameter" },
2896: { 2, 5, "Authentication failure" },
2897: { 2, 6, "Unacceptable hold time" },
2898: { 2, 7, "Required capability missing" }, /* [RFC5492] */
2899: { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
2900: { 3, 0, "Invalid UPDATE message" },
2901: { 3, 1, "Malformed attribute list" },
2902: { 3, 2, "Unrecognized well-known attribute" },
2903: { 3, 3, "Missing mandatory attribute" },
2904: { 3, 4, "Invalid attribute flags" },
2905: { 3, 5, "Invalid attribute length" },
2906: { 3, 6, "Invalid ORIGIN attribute" },
2907: { 3, 7, "AS routing loop" }, /* Deprecated */
2908: { 3, 8, "Invalid NEXT_HOP attribute" },
2909: { 3, 9, "Optional attribute error" },
2910: { 3, 10, "Invalid network field" },
2911: { 3, 11, "Malformed AS_PATH" },
2912: { 4, 0, "Hold timer expired" },
2913: { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
2914: { 5, 1, "Unexpected message in OpenSent state" },
2915: { 5, 2, "Unexpected message in OpenConfirm state" },
2916: { 5, 3, "Unexpected message in Established state" },
2917: { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
2918: { 6, 1, "Maximum number of prefixes reached" },
2919: { 6, 2, "Administrative shutdown" },
2920: { 6, 3, "Peer de-configured" },
2921: { 6, 4, "Administrative reset" },
2922: { 6, 5, "Connection rejected" },
2923: { 6, 6, "Other configuration change" },
2924: { 6, 7, "Connection collision resolution" },
2925: { 6, 8, "Out of Resources" },
2926: { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
2927: { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
2928: };
2929:
2930: /**
2931: * bgp_error_dsc - return BGP error description
2932: * @code: BGP error code
2933: * @subcode: BGP error subcode
2934: *
2935: * bgp_error_dsc() returns error description for BGP errors
2936: * which might be static string or given temporary buffer.
2937: */
2938: const char *
2939: bgp_error_dsc(uint code, uint subcode)
2940: {
2941: static char buff[32];
2942: uint i;
2943:
2944: for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
2945: if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
2946: return bgp_msg_table[i].msg;
2947:
2948: bsprintf(buff, "Unknown error %u.%u", code, subcode);
2949: return buff;
2950: }
2951:
2952: /* RFC 8203 - shutdown communication message */
2953: static int
2954: bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp)
2955: {
2956: byte *msg = data + 1;
2957: uint msg_len = data[0];
2958: uint i;
2959:
2960: /* Handle zero length message */
2961: if (msg_len == 0)
2962: return 1;
2963:
2964: /* Handle proper message */
2965: if (msg_len + 1 > len)
2966: return 0;
2967:
2968: /* Some elementary cleanup */
2969: for (i = 0; i < msg_len; i++)
2970: if (msg[i] < ' ')
2971: msg[i] = ' ';
2972:
2973: proto_set_message(&p->p, msg, msg_len);
2974: *bp += bsprintf(*bp, ": \"%s\"", p->p.message);
2975: return 1;
2976: }
2977:
2978: void
2979: bgp_log_error(struct bgp_proto *p, u8 class, char *msg, uint code, uint subcode, byte *data, uint len)
2980: {
2981: byte argbuf[256+16], *t = argbuf;
2982: uint i;
2983:
2984: /* Don't report Cease messages generated by myself */
2985: if (code == 6 && class == BE_BGP_TX)
2986: return;
2987:
2988: /* Reset shutdown message */
2989: if ((code == 6) && ((subcode == 2) || (subcode == 4)))
2990: proto_set_message(&p->p, NULL, 0);
2991:
2992: if (len)
2993: {
2994: /* Bad peer AS - we would like to print the AS */
2995: if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
2996: {
2997: t += bsprintf(t, ": %u", (len == 2) ? get_u16(data) : get_u32(data));
2998: goto done;
2999: }
3000:
3001: /* RFC 8203 - shutdown communication */
3002: if (((code == 6) && ((subcode == 2) || (subcode == 4))))
3003: if (bgp_handle_message(p, data, len, &t))
3004: goto done;
3005:
3006: *t++ = ':';
3007: *t++ = ' ';
3008: if (len > 16)
3009: len = 16;
3010: for (i=0; i<len; i++)
3011: t += bsprintf(t, "%02x", data[i]);
3012: }
3013:
3014: done:
3015: *t = 0;
3016: const byte *dsc = bgp_error_dsc(code, subcode);
3017: log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, dsc, argbuf);
3018: }
3019:
3020: static void
3021: bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
3022: {
3023: struct bgp_proto *p = conn->bgp;
3024:
3025: if (len < 21)
3026: { bgp_error(conn, 1, 2, pkt+16, 2); return; }
3027:
3028: uint code = pkt[19];
3029: uint subcode = pkt[20];
3030: int err = (code != 6);
3031:
3032: bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
3033: bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
3034:
3035: bgp_conn_enter_close_state(conn);
3036: bgp_schedule_packet(conn, NULL, PKT_SCHEDULE_CLOSE);
3037:
3038: if (err)
3039: {
3040: bgp_update_startup_delay(p);
3041: bgp_stop(p, 0, NULL, 0);
3042: }
3043: else
3044: {
3045: uint subcode_bit = 1 << ((subcode <= 8) ? subcode : 0);
3046: if (p->cf->disable_after_cease & subcode_bit)
3047: {
3048: log(L_INFO "%s: Disabled after Cease notification", p->p.name);
3049: p->startup_delay = 0;
3050: p->p.disabled = 1;
3051: }
3052: }
3053: }
3054:
3055: static void
3056: bgp_rx_keepalive(struct bgp_conn *conn)
3057: {
3058: struct bgp_proto *p = conn->bgp;
3059:
3060: BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
3061: bgp_start_timer(conn->hold_timer, conn->hold_time);
3062:
3063: if (conn->state == BS_OPENCONFIRM)
3064: { bgp_conn_enter_established_state(conn); return; }
3065:
3066: if (conn->state != BS_ESTABLISHED)
3067: bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
3068: }
3069:
3070:
3071: /**
3072: * bgp_rx_packet - handle a received packet
3073: * @conn: BGP connection
3074: * @pkt: start of the packet
3075: * @len: packet size
3076: *
3077: * bgp_rx_packet() takes a newly received packet and calls the corresponding
3078: * packet handler according to the packet type.
3079: */
3080: static void
3081: bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
3082: {
3083: byte type = pkt[18];
3084:
3085: DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
3086:
3087: if (conn->bgp->p.mrtdump & MD_MESSAGES)
3088: bgp_dump_message(conn, pkt, len);
3089:
3090: switch (type)
3091: {
3092: case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
3093: case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
3094: case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
3095: case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
3096: case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
3097: default: bgp_error(conn, 1, 3, pkt+18, 1);
3098: }
3099: }
3100:
3101: /**
3102: * bgp_rx - handle received data
3103: * @sk: socket
3104: * @size: amount of data received
3105: *
3106: * bgp_rx() is called by the socket layer whenever new data arrive from
3107: * the underlying TCP connection. It assembles the data fragments to packets,
3108: * checks their headers and framing and passes complete packets to
3109: * bgp_rx_packet().
3110: */
3111: int
3112: bgp_rx(sock *sk, uint size)
3113: {
3114: struct bgp_conn *conn = sk->data;
3115: byte *pkt_start = sk->rbuf;
3116: byte *end = pkt_start + size;
3117: uint i, len;
3118:
3119: DBG("BGP: RX hook: Got %d bytes\n", size);
3120: while (end >= pkt_start + BGP_HEADER_LENGTH)
3121: {
3122: if ((conn->state == BS_CLOSE) || (conn->sk != sk))
3123: return 0;
3124: for(i=0; i<16; i++)
3125: if (pkt_start[i] != 0xff)
3126: {
3127: bgp_error(conn, 1, 1, NULL, 0);
3128: break;
3129: }
3130: len = get_u16(pkt_start+16);
3131: if ((len < BGP_HEADER_LENGTH) || (len > bgp_max_packet_length(conn)))
3132: {
3133: bgp_error(conn, 1, 2, pkt_start+16, 2);
3134: break;
3135: }
3136: if (end < pkt_start + len)
3137: break;
3138: bgp_rx_packet(conn, pkt_start, len);
3139: pkt_start += len;
3140: }
3141: if (pkt_start != sk->rbuf)
3142: {
3143: memmove(sk->rbuf, pkt_start, end - pkt_start);
3144: sk->rpos = sk->rbuf + (end - pkt_start);
3145: }
3146: return 0;
3147: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>