Annotation of embedaddon/bird/proto/bgp/packets.c, revision 1.1.1.2
1.1 misho 1: /*
2: * BIRD -- BGP Packet Processing
3: *
4: * (c) 2000 Martin Mares <mj@ucw.cz>
5: *
6: * Can be freely distributed and used under the terms of the GNU GPL.
7: */
8:
9: #undef LOCAL_DEBUG
10:
11: #include "nest/bird.h"
12: #include "nest/iface.h"
13: #include "nest/protocol.h"
14: #include "nest/route.h"
15: #include "nest/attrs.h"
1.1.1.2 ! misho 16: #include "proto/mrt/mrt.h"
1.1 misho 17: #include "conf/conf.h"
18: #include "lib/unaligned.h"
19: #include "lib/socket.h"
20:
21: #include "nest/cli.h"
22:
23: #include "bgp.h"
24:
25:
26: #define BGP_RR_REQUEST 0
27: #define BGP_RR_BEGIN 1
28: #define BGP_RR_END 2
29:
30:
31: static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
32: static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
33:
34: /* Table for state -> RFC 6608 FSM error subcodes */
35: static byte fsm_err_subcode[BS_MAX] = {
36: [BS_OPENSENT] = 1,
37: [BS_OPENCONFIRM] = 2,
38: [BS_ESTABLISHED] = 3
39: };
40:
1.1.1.2 ! misho 41: static void
! 42: init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d)
1.1 misho 43: {
44: struct bgp_proto *p = conn->bgp;
1.1.1.2 ! misho 45: int p_ok = conn->state >= BS_OPENCONFIRM;
1.1 misho 46:
1.1.1.2 ! misho 47: memset(d, 0, sizeof(struct mrt_bgp_data));
! 48: d->peer_as = p->remote_as;
! 49: d->local_as = p->local_as;
! 50: d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0;
! 51: d->af = BGP_AF;
! 52: d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE;
! 53: d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE;
! 54: d->as4 = p_ok ? p->as4_session : 0;
! 55: d->add_path = p_ok ? p->add_path_rx : 0;
1.1 misho 56: }
57:
58: static void
1.1.1.2 ! misho 59: bgp_dump_message(struct bgp_conn *conn, byte *pkt, uint len)
1.1 misho 60: {
1.1.1.2 ! misho 61: struct mrt_bgp_data d;
! 62: init_mrt_bgp_data(conn, &d);
1.1 misho 63:
1.1.1.2 ! misho 64: d.message = pkt;
! 65: d.msg_len = len;
1.1 misho 66:
1.1.1.2 ! misho 67: mrt_dump_bgp_message(&d);
1.1 misho 68: }
69:
70: void
1.1.1.2 ! misho 71: bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new)
1.1 misho 72: {
1.1.1.2 ! misho 73: struct mrt_bgp_data d;
! 74: init_mrt_bgp_data(conn, &d);
! 75:
! 76: d.old_state = old;
! 77: d.new_state = new;
1.1 misho 78:
1.1.1.2 ! misho 79: mrt_dump_bgp_state_change(&d);
1.1 misho 80: }
81:
82: static byte *
83: bgp_create_notification(struct bgp_conn *conn, byte *buf)
84: {
85: struct bgp_proto *p = conn->bgp;
86:
87: BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
88: buf[0] = conn->notify_code;
89: buf[1] = conn->notify_subcode;
90: memcpy(buf+2, conn->notify_data, conn->notify_size);
91: return buf + 2 + conn->notify_size;
92: }
93:
94: #ifdef IPV6
95: static byte *
96: bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf)
97: {
98: *buf++ = 1; /* Capability 1: Multiprotocol extensions */
99: *buf++ = 4; /* Capability data length */
100: *buf++ = 0; /* We support AF IPv6 */
101: *buf++ = BGP_AF_IPV6;
102: *buf++ = 0; /* RFU */
103: *buf++ = 1; /* and SAFI 1 */
104: return buf;
105: }
106:
107: #else
108:
109: static byte *
110: bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf)
111: {
112: *buf++ = 1; /* Capability 1: Multiprotocol extensions */
113: *buf++ = 4; /* Capability data length */
114: *buf++ = 0; /* We support AF IPv4 */
115: *buf++ = BGP_AF_IPV4;
116: *buf++ = 0; /* RFU */
117: *buf++ = 1; /* and SAFI 1 */
118: return buf;
119: }
120: #endif
121:
122: static byte *
123: bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
124: {
125: *buf++ = 2; /* Capability 2: Support for route refresh */
126: *buf++ = 0; /* Capability data length */
127: return buf;
128: }
129:
130: static byte *
131: bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf)
132: {
133: *buf++ = 6; /* Capability 6: Support for extended messages */
134: *buf++ = 0; /* Capability data length */
135: return buf;
136: }
137:
138: static byte *
139: bgp_put_cap_gr1(struct bgp_proto *p, byte *buf)
140: {
141: *buf++ = 64; /* Capability 64: Support for graceful restart */
142: *buf++ = 6; /* Capability data length */
143:
144: put_u16(buf, p->cf->gr_time);
145: if (p->p.gr_recovery)
146: buf[0] |= BGP_GRF_RESTART;
147: buf += 2;
148:
149: *buf++ = 0; /* Appropriate AF */
150: *buf++ = BGP_AF;
151: *buf++ = 1; /* and SAFI 1 */
152: *buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0;
153:
154: return buf;
155: }
156:
157: static byte *
158: bgp_put_cap_gr2(struct bgp_proto *p UNUSED, byte *buf)
159: {
160: *buf++ = 64; /* Capability 64: Support for graceful restart */
161: *buf++ = 2; /* Capability data length */
162: put_u16(buf, 0);
163: return buf + 2;
164: }
165:
166: static byte *
167: bgp_put_cap_as4(struct bgp_proto *p, byte *buf)
168: {
169: *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
170: *buf++ = 4; /* Capability data length */
171: put_u32(buf, p->local_as);
172: return buf + 4;
173: }
174:
175: static byte *
176: bgp_put_cap_add_path(struct bgp_proto *p, byte *buf)
177: {
178: *buf++ = 69; /* Capability 69: Support for ADD-PATH */
179: *buf++ = 4; /* Capability data length */
180:
181: *buf++ = 0; /* Appropriate AF */
182: *buf++ = BGP_AF;
183: *buf++ = 1; /* SAFI 1 */
184:
185: *buf++ = p->cf->add_path;
186:
187: return buf;
188: }
189:
190: static byte *
191: bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf)
192: {
193: *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
194: *buf++ = 0; /* Capability data length */
195: return buf;
196: }
197:
1.1.1.2 ! misho 198: static byte *
! 199: bgp_put_cap_llgr1(struct bgp_proto *p, byte *buf)
! 200: {
! 201: *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */
! 202: *buf++ = 7; /* Capability data length */
! 203:
! 204: *buf++ = 0; /* Appropriate AF */
! 205: *buf++ = BGP_AF;
! 206: *buf++ = 1; /* and SAFI 1 */
! 207:
! 208: /* Next is 8bit flags and 24bit time */
! 209: put_u32(buf, p->cf->llgr_time);
! 210: buf[0] = p->p.gr_recovery ? BGP_LLGRF_FORWARDING : 0;
! 211: buf += 4;
! 212:
! 213: return buf;
! 214: }
! 215:
! 216: static byte *
! 217: bgp_put_cap_llgr2(struct bgp_proto *p UNUSED, byte *buf)
! 218: {
! 219: *buf++ = 71; /* Capability 71: Support for long-lived graceful restart */
! 220: *buf++ = 0; /* Capability data length */
! 221: return buf;
! 222: }
! 223:
1.1 misho 224:
225: static byte *
226: bgp_create_open(struct bgp_conn *conn, byte *buf)
227: {
228: struct bgp_proto *p = conn->bgp;
229: byte *cap;
230: int cap_len;
231:
232: BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
233: BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
234: buf[0] = BGP_VERSION;
235: put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
236: put_u16(buf+3, p->cf->hold_time);
237: put_u32(buf+5, p->local_id);
238:
239: if (conn->start_state == BSS_CONNECT_NOCAP)
240: {
241: BGP_TRACE(D_PACKETS, "Skipping capabilities");
242: buf[9] = 0;
243: return buf + 10;
244: }
245:
246: /* Skipped 3 B for length field and Capabilities parameter header */
247: cap = buf + 12;
248:
249: #ifndef IPV6
250: if (p->cf->advertise_ipv4)
251: cap = bgp_put_cap_ipv4(p, cap);
252: #endif
253:
254: #ifdef IPV6
255: cap = bgp_put_cap_ipv6(p, cap);
256: #endif
257:
258: if (p->cf->enable_refresh)
259: cap = bgp_put_cap_rr(p, cap);
260:
261: if (p->cf->gr_mode == BGP_GR_ABLE)
262: cap = bgp_put_cap_gr1(p, cap);
263: else if (p->cf->gr_mode == BGP_GR_AWARE)
264: cap = bgp_put_cap_gr2(p, cap);
265:
266: if (p->cf->enable_as4)
267: cap = bgp_put_cap_as4(p, cap);
268:
269: if (p->cf->add_path)
270: cap = bgp_put_cap_add_path(p, cap);
271:
272: if (p->cf->enable_refresh)
273: cap = bgp_put_cap_err(p, cap);
274:
275: if (p->cf->enable_extended_messages)
276: cap = bgp_put_cap_ext_msg(p, cap);
277:
1.1.1.2 ! misho 278: if (p->cf->llgr_mode == BGP_LLGR_ABLE)
! 279: cap = bgp_put_cap_llgr1(p, cap);
! 280: else if (p->cf->llgr_mode == BGP_LLGR_AWARE)
! 281: cap = bgp_put_cap_llgr2(p, cap);
! 282:
1.1 misho 283: cap_len = cap - buf - 12;
284: if (cap_len > 0)
285: {
286: buf[9] = cap_len + 2; /* Optional params len */
287: buf[10] = 2; /* Option: Capability list */
288: buf[11] = cap_len; /* Option length */
289: return cap;
290: }
291: else
292: {
293: buf[9] = 0; /* No optional parameters */
294: return buf + 10;
295: }
296: }
297:
298: static uint
299: bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, uint remains)
300: {
301: byte *start = w;
302: ip_addr a;
303: int bytes;
304:
305: while (!EMPTY_LIST(buck->prefixes) && (remains >= (5+sizeof(ip_addr))))
306: {
307: struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
308: DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
309:
310: if (p->add_path_tx)
311: {
312: put_u32(w, px->path_id);
313: w += 4;
314: remains -= 4;
315: }
316:
317: *w++ = px->n.pxlen;
318: bytes = (px->n.pxlen + 7) / 8;
319: a = px->n.prefix;
320: ipa_hton(a);
321: memcpy(w, &a, bytes);
322: w += bytes;
323: remains -= bytes + 1;
324: rem_node(&px->bucket_node);
325: bgp_free_prefix(p, px);
326: // fib_delete(&p->prefix_fib, px);
327: }
328: return w - start;
329: }
330:
331: static void
332: bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
333: {
334: while (!EMPTY_LIST(buck->prefixes))
335: {
336: struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
337: log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
338: rem_node(&px->bucket_node);
339: bgp_free_prefix(p, px);
340: // fib_delete(&p->prefix_fib, px);
341: }
342: }
343:
344: #ifndef IPV6 /* IPv4 version */
345:
346: static byte *
347: bgp_create_update(struct bgp_conn *conn, byte *buf)
348: {
349: struct bgp_proto *p = conn->bgp;
350: struct bgp_bucket *buck;
351: int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
352: byte *w;
353: int wd_size = 0;
354: int r_size = 0;
355: int a_size = 0;
356:
357: w = buf+2;
358: if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
359: {
360: DBG("Withdrawn routes:\n");
361: wd_size = bgp_encode_prefixes(p, w, buck, remains);
362: w += wd_size;
363: remains -= wd_size;
364: }
365: put_u16(buf, wd_size);
366:
367: if (!wd_size)
368: {
369: while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
370: {
371: if (EMPTY_LIST(buck->prefixes))
372: {
373: DBG("Deleting empty bucket %p\n", buck);
374: rem_node(&buck->send_node);
375: bgp_free_bucket(p, buck);
376: continue;
377: }
378:
379: DBG("Processing bucket %p\n", buck);
380: a_size = bgp_encode_attrs(p, w+2, buck->eattrs, remains - 1024);
381:
382: if (a_size < 0)
383: {
384: log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
385: bgp_flush_prefixes(p, buck);
386: rem_node(&buck->send_node);
387: bgp_free_bucket(p, buck);
388: continue;
389: }
390:
391: put_u16(w, a_size);
392: w += a_size + 2;
393: r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
394: w += r_size;
395: break;
396: }
397: }
398: if (!a_size) /* Attributes not already encoded */
399: {
400: put_u16(w, 0);
401: w += 2;
402: }
403: if (wd_size || r_size)
404: {
405: BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
406: return w;
407: }
408: else
409: return NULL;
410: }
411:
412: static byte *
413: bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
414: {
415: struct bgp_proto *p = conn->bgp;
416: BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
417:
418: put_u32(buf, 0);
419: return buf+4;
420: }
421:
422: #else /* IPv6 version */
423:
424: static inline int
425: same_iface(struct bgp_proto *p, ip_addr *ip)
426: {
427: neighbor *n = neigh_find(&p->p, ip, 0);
428: return n && p->neigh && n->iface == p->neigh->iface;
429: }
430:
431: static byte *
432: bgp_create_update(struct bgp_conn *conn, byte *buf)
433: {
434: struct bgp_proto *p = conn->bgp;
435: struct bgp_bucket *buck;
436: int size, second, rem_stored;
437: int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
438: byte *w, *w_stored, *tmp, *tstart;
439: ip_addr *ipp, ip, ip_ll;
440: ea_list *ea;
441: eattr *nh;
442:
443: put_u16(buf, 0);
444: w = buf+4;
445:
446: if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
447: {
448: DBG("Withdrawn routes:\n");
449: tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
450: *tmp++ = 0;
451: *tmp++ = BGP_AF_IPV6;
452: *tmp++ = 1;
453: ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
454: size = bgp_encode_attrs(p, w, ea, remains);
455: ASSERT(size >= 0);
456: w += size;
457: remains -= size;
458: }
459: else
460: {
461: while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
462: {
463: if (EMPTY_LIST(buck->prefixes))
464: {
465: DBG("Deleting empty bucket %p\n", buck);
466: rem_node(&buck->send_node);
467: bgp_free_bucket(p, buck);
468: continue;
469: }
470:
471: DBG("Processing bucket %p\n", buck);
472: rem_stored = remains;
473: w_stored = w;
474:
475: size = bgp_encode_attrs(p, w, buck->eattrs, remains - 1024);
476: if (size < 0)
477: {
478: log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
479: bgp_flush_prefixes(p, buck);
480: rem_node(&buck->send_node);
481: bgp_free_bucket(p, buck);
482: continue;
483: }
484: w += size;
485: remains -= size;
486:
487: /* We have two addresses here in NEXT_HOP eattr. Really.
488: Unless NEXT_HOP was modified by filter */
489: nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
490: ASSERT(nh);
491: second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
492: ipp = (ip_addr *) nh->u.ptr->data;
493: ip = ipp[0];
494: ip_ll = IPA_NONE;
495:
496: if (ipa_equal(ip, p->source_addr))
497: ip_ll = p->local_link;
498: else
499: {
500: /* If we send a route with 'third party' next hop destinated
501: * in the same interface, we should also send a link local
502: * next hop address. We use the received one (stored in the
503: * other part of BA_NEXT_HOP eattr). If we didn't received
504: * it (for example it is a static route), we can't use
505: * 'third party' next hop and we have to use local IP address
506: * as next hop. Sending original next hop address without
507: * link local address seems to be a natural way to solve that
508: * problem, but it is contrary to RFC 2545 and Quagga does not
509: * accept such routes.
510: *
511: * There are two cases, either we have global IP, or
512: * IPA_NONE if the neighbor is link-local. For IPA_NONE,
513: * we suppose it is on the same iface, see bgp_update_attrs().
514: */
515:
516: if (ipa_zero(ip) || same_iface(p, &ip))
517: {
518: if (second && ipa_nonzero(ipp[1]))
519: ip_ll = ipp[1];
520: else
521: {
522: switch (p->cf->missing_lladdr)
523: {
524: case MLL_SELF:
525: ip = p->source_addr;
526: ip_ll = p->local_link;
527: break;
528: case MLL_DROP:
529: log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
530: w = w_stored;
531: remains = rem_stored;
532: bgp_flush_prefixes(p, buck);
533: rem_node(&buck->send_node);
534: bgp_free_bucket(p, buck);
535: continue;
536: case MLL_IGNORE:
537: break;
538: }
539: }
540: }
541: }
542:
543: tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
544: *tmp++ = 0;
545: *tmp++ = BGP_AF_IPV6;
546: *tmp++ = 1;
547:
548: if (ipa_is_link_local(ip))
549: ip = IPA_NONE;
550:
551: if (ipa_nonzero(ip_ll))
552: {
553: *tmp++ = 32;
554: ipa_hton(ip);
555: memcpy(tmp, &ip, 16);
556: ipa_hton(ip_ll);
557: memcpy(tmp+16, &ip_ll, 16);
558: tmp += 32;
559: }
560: else
561: {
562: *tmp++ = 16;
563: ipa_hton(ip);
564: memcpy(tmp, &ip, 16);
565: tmp += 16;
566: }
567:
568: *tmp++ = 0; /* No SNPA information */
569: tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
570: ea->attrs[0].u.ptr->length = tmp - tstart;
571: size = bgp_encode_attrs(p, w, ea, remains);
572: ASSERT(size >= 0);
573: w += size;
574: break;
575: }
576: }
577:
578: size = w - (buf+4);
579: put_u16(buf+2, size);
580: lp_flush(bgp_linpool);
581: if (size)
582: {
583: BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
584: return w;
585: }
586: else
587: return NULL;
588: }
589:
590: static byte *
591: bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
592: {
593: struct bgp_proto *p = conn->bgp;
594: BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
595:
596: put_u16(buf+0, 0);
597: put_u16(buf+2, 6); /* length 4-9 */
598: buf += 4;
599:
600: /* Empty MP_UNREACH_NLRI atribute */
601: *buf++ = BAF_OPTIONAL;
602: *buf++ = BA_MP_UNREACH_NLRI;
603: *buf++ = 3; /* Length 7-9 */
604: *buf++ = 0; /* AFI */
605: *buf++ = BGP_AF_IPV6;
606: *buf++ = 1; /* SAFI */
607: return buf;
608: }
609:
610: #endif
611:
612: static inline byte *
613: bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
614: {
615: struct bgp_proto *p = conn->bgp;
616: BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
617:
618: /* Original original route refresh request, RFC 2918 */
619: *buf++ = 0;
620: *buf++ = BGP_AF;
621: *buf++ = BGP_RR_REQUEST;
622: *buf++ = 1; /* SAFI */
623: return buf;
624: }
625:
626: static inline byte *
627: bgp_create_begin_refresh(struct bgp_conn *conn, byte *buf)
628: {
629: struct bgp_proto *p = conn->bgp;
630: BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
631:
632: /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
633: *buf++ = 0;
634: *buf++ = BGP_AF;
635: *buf++ = BGP_RR_BEGIN;
636: *buf++ = 1; /* SAFI */
637: return buf;
638: }
639:
640: static inline byte *
641: bgp_create_end_refresh(struct bgp_conn *conn, byte *buf)
642: {
643: struct bgp_proto *p = conn->bgp;
644: BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
645:
646: /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
647: *buf++ = 0;
648: *buf++ = BGP_AF;
649: *buf++ = BGP_RR_END;
650: *buf++ = 1; /* SAFI */
651: return buf;
652: }
653:
654:
655: static void
656: bgp_create_header(byte *buf, uint len, uint type)
657: {
658: memset(buf, 0xff, 16); /* Marker */
659: put_u16(buf+16, len);
660: buf[18] = type;
661: }
662:
663: /**
664: * bgp_fire_tx - transmit packets
665: * @conn: connection
666: *
667: * Whenever the transmit buffers of the underlying TCP connection
668: * are free and we have any packets queued for sending, the socket functions
669: * call bgp_fire_tx() which takes care of selecting the highest priority packet
670: * queued (Notification > Keepalive > Open > Update), assembling its header
671: * and body and sending it to the connection.
672: */
673: static int
674: bgp_fire_tx(struct bgp_conn *conn)
675: {
676: struct bgp_proto *p = conn->bgp;
677: uint s = conn->packets_to_send;
678: sock *sk = conn->sk;
679: byte *buf, *pkt, *end;
680: int type;
681:
682: if (!sk)
683: {
684: conn->packets_to_send = 0;
685: return 0;
686: }
687: buf = sk->tbuf;
688: pkt = buf + BGP_HEADER_LENGTH;
689:
690: if (s & (1 << PKT_SCHEDULE_CLOSE))
691: {
692: /* We can finally close connection and enter idle state */
693: bgp_conn_enter_idle_state(conn);
694: return 0;
695: }
696: if (s & (1 << PKT_NOTIFICATION))
697: {
698: s = 1 << PKT_SCHEDULE_CLOSE;
699: type = PKT_NOTIFICATION;
700: end = bgp_create_notification(conn, pkt);
701: }
702: else if (s & (1 << PKT_KEEPALIVE))
703: {
704: s &= ~(1 << PKT_KEEPALIVE);
705: type = PKT_KEEPALIVE;
706: end = pkt; /* Keepalives carry no data */
707: BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
708: bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
709: }
710: else if (s & (1 << PKT_OPEN))
711: {
712: s &= ~(1 << PKT_OPEN);
713: type = PKT_OPEN;
714: end = bgp_create_open(conn, pkt);
715: }
716: else if (s & (1 << PKT_ROUTE_REFRESH))
717: {
718: s &= ~(1 << PKT_ROUTE_REFRESH);
719: type = PKT_ROUTE_REFRESH;
720: end = bgp_create_route_refresh(conn, pkt);
721: }
722: else if (s & (1 << PKT_BEGIN_REFRESH))
723: {
724: s &= ~(1 << PKT_BEGIN_REFRESH);
725: type = PKT_ROUTE_REFRESH; /* BoRR is a subtype of RR */
726: end = bgp_create_begin_refresh(conn, pkt);
727: }
728: else if (s & (1 << PKT_UPDATE))
729: {
730: type = PKT_UPDATE;
731: end = bgp_create_update(conn, pkt);
732:
733: if (!end)
734: {
735: /* No update to send, perhaps we need to send End-of-RIB or EoRR */
736:
737: conn->packets_to_send = 0;
738:
739: if (p->feed_state == BFS_LOADED)
740: {
741: type = PKT_UPDATE;
742: end = bgp_create_end_mark(conn, pkt);
743: }
744:
745: else if (p->feed_state == BFS_REFRESHED)
746: {
747: type = PKT_ROUTE_REFRESH;
748: end = bgp_create_end_refresh(conn, pkt);
749: }
750:
751: else /* Really nothing to send */
752: return 0;
753:
754: p->feed_state = BFS_NONE;
755: }
756: }
757: else
758: return 0;
759:
760: conn->packets_to_send = s;
761: bgp_create_header(buf, end - buf, type);
762: return sk_send(sk, end - buf);
763: }
764:
765: /**
766: * bgp_schedule_packet - schedule a packet for transmission
767: * @conn: connection
768: * @type: packet type
769: *
770: * Schedule a packet of type @type to be sent as soon as possible.
771: */
772: void
773: bgp_schedule_packet(struct bgp_conn *conn, int type)
774: {
775: DBG("BGP: Scheduling packet type %d\n", type);
776: conn->packets_to_send |= 1 << type;
777: if (conn->sk && conn->sk->tpos == conn->sk->tbuf && !ev_active(conn->tx_ev))
778: ev_schedule(conn->tx_ev);
779: }
780:
781: void
782: bgp_kick_tx(void *vconn)
783: {
784: struct bgp_conn *conn = vconn;
785:
786: DBG("BGP: kicking TX\n");
1.1.1.2 ! misho 787: uint max = 1024;
! 788: while (--max && (bgp_fire_tx(conn) > 0))
1.1 misho 789: ;
1.1.1.2 ! misho 790:
! 791: if (!max && !ev_active(conn->tx_ev))
! 792: ev_schedule(conn->tx_ev);
1.1 misho 793: }
794:
795: void
796: bgp_tx(sock *sk)
797: {
798: struct bgp_conn *conn = sk->data;
799:
800: DBG("BGP: TX hook\n");
1.1.1.2 ! misho 801: uint max = 1024;
! 802: while (--max && (bgp_fire_tx(conn) > 0))
1.1 misho 803: ;
1.1.1.2 ! misho 804:
! 805: if (!max && !ev_active(conn->tx_ev))
! 806: ev_schedule(conn->tx_ev);
1.1 misho 807: }
808:
809: /* Capatibility negotiation as per RFC 2842 */
810:
811: void
812: bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
813: {
814: // struct bgp_proto *p = conn->bgp;
815: int i, cl;
816:
817: while (len > 0)
818: {
819: if (len < 2 || len < 2 + opt[1])
820: goto err;
821:
822: cl = opt[1];
823:
824: switch (opt[0])
825: {
826: case 2: /* Route refresh capability, RFC 2918 */
827: if (cl != 0)
828: goto err;
829: conn->peer_refresh_support = 1;
830: break;
831:
832: case 6: /* Extended message length capability, draft */
833: if (cl != 0)
834: goto err;
835: conn->peer_ext_messages_support = 1;
836: break;
837:
838: case 64: /* Graceful restart capability, RFC 4724 */
839: if (cl % 4 != 2)
840: goto err;
841: conn->peer_gr_aware = 1;
842: conn->peer_gr_able = 0;
843: conn->peer_gr_time = get_u16(opt + 2) & 0x0fff;
844: conn->peer_gr_flags = opt[2] & 0xf0;
845: conn->peer_gr_aflags = 0;
846: for (i = 2; i < cl; i += 4)
847: if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
848: {
849: conn->peer_gr_able = 1;
850: conn->peer_gr_aflags = opt[2+i+3];
851: }
852: break;
853:
854: case 65: /* AS4 capability, RFC 4893 */
855: if (cl != 4)
856: goto err;
857: conn->peer_as4_support = 1;
858: if (conn->bgp->cf->enable_as4)
859: conn->advertised_as = get_u32(opt + 2);
860: break;
861:
1.1.1.2 ! misho 862: case 69: /* ADD-PATH capability, RFC 7911 */
1.1 misho 863: if (cl % 4)
864: goto err;
865: for (i = 0; i < cl; i += 4)
866: if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
867: conn->peer_add_path = opt[2+i+3];
868: if (conn->peer_add_path > ADD_PATH_FULL)
869: goto err;
870: break;
871:
872: case 70: /* Enhanced route refresh capability, RFC 7313 */
873: if (cl != 0)
874: goto err;
875: conn->peer_enhanced_refresh_support = 1;
876: break;
877:
1.1.1.2 ! misho 878: case 71: /* Long-lived graceful restart capability, RFC draft */
! 879: if (cl % 7)
! 880: goto err;
! 881: conn->peer_llgr_aware = 1;
! 882: conn->peer_llgr_able = 0;
! 883: conn->peer_llgr_time = 0;
! 884: conn->peer_llgr_aflags = 0;
! 885: for (i = 0; i < cl; i += 7)
! 886: if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
! 887: {
! 888: conn->peer_llgr_able = 1;
! 889: conn->peer_llgr_time = get_u32(opt + 2+i+3) & 0xffffff;
! 890: conn->peer_llgr_aflags = opt[2+i+3];
! 891: }
! 892: break;
! 893:
1.1 misho 894: /* We can safely ignore all other capabilities */
895: }
896: len -= 2 + cl;
897: opt += 2 + cl;
898: }
1.1.1.2 ! misho 899:
! 900: /* The LLGR capability must be advertised together with the GR capability,
! 901: otherwise it must be disregarded */
! 902: if (!conn->peer_gr_aware && conn->peer_llgr_aware)
! 903: {
! 904: conn->peer_llgr_aware = 0;
! 905: conn->peer_llgr_able = 0;
! 906: conn->peer_llgr_time = 0;
! 907: conn->peer_llgr_aflags = 0;
! 908: }
! 909:
1.1 misho 910: return;
911:
912: err:
913: bgp_error(conn, 2, 0, NULL, 0);
914: return;
915: }
916:
917: static int
918: bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
919: {
920: struct bgp_proto *p = conn->bgp;
921: int ol;
922:
923: while (len > 0)
924: {
925: if (len < 2 || len < 2 + opt[1])
926: { bgp_error(conn, 2, 0, NULL, 0); return 0; }
927: #ifdef LOCAL_DEBUG
928: {
929: int i;
930: DBG("\tOption %02x:", opt[0]);
931: for(i=0; i<opt[1]; i++)
932: DBG(" %02x", opt[2+i]);
933: DBG("\n");
934: }
935: #endif
936:
937: ol = opt[1];
938: switch (opt[0])
939: {
940: case 2:
941: if (conn->start_state == BSS_CONNECT_NOCAP)
942: BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
943: else
944: bgp_parse_capabilities(conn, opt + 2, ol);
945: break;
946:
947: default:
948: /*
949: * BGP specs don't tell us to send which option
950: * we didn't recognize, but it's common practice
951: * to do so. Also, capability negotiation with
952: * Cisco routers doesn't work without that.
953: */
954: bgp_error(conn, 2, 4, opt, ol);
955: return 0;
956: }
957: len -= 2 + ol;
958: opt += 2 + ol;
959: }
960: return 0;
961: }
962:
963: static void
964: bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
965: {
966: struct bgp_conn *other;
967: struct bgp_proto *p = conn->bgp;
968: unsigned hold;
969: u16 base_as;
970: u32 id;
971:
972: /* Check state */
973: if (conn->state != BS_OPENSENT)
974: { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
975:
976: /* Check message contents */
977: if (len < 29 || len != 29U + pkt[28])
978: { bgp_error(conn, 1, 2, pkt+16, 2); return; }
979: if (pkt[19] != BGP_VERSION)
980: { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
981: conn->advertised_as = base_as = get_u16(pkt+20);
982: hold = get_u16(pkt+22);
983: id = get_u32(pkt+24);
984: BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
985:
986: if (bgp_parse_options(conn, pkt+29, pkt[28]))
987: return;
988:
989: if (hold > 0 && hold < 3)
990: { bgp_error(conn, 2, 6, pkt+22, 2); return; }
991:
992: /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
993: if (!id || (p->is_internal && id == p->local_id))
994: { bgp_error(conn, 2, 3, pkt+24, -4); return; }
995:
996: if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
997: log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
998:
999: if (conn->advertised_as != p->remote_as)
1000: {
1001: if (conn->peer_as4_support)
1002: {
1003: u32 val = htonl(conn->advertised_as);
1004: bgp_error(conn, 2, 2, (byte *) &val, 4);
1005: }
1006: else
1007: bgp_error(conn, 2, 2, pkt+20, 2);
1008:
1009: return;
1010: }
1011:
1012: /* Check the other connection */
1013: other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
1014: switch (other->state)
1015: {
1016: case BS_CONNECT:
1017: case BS_ACTIVE:
1018: /* Stop outgoing connection attempts */
1019: bgp_conn_enter_idle_state(other);
1020: break;
1021:
1022: case BS_IDLE:
1023: case BS_OPENSENT:
1024: case BS_CLOSE:
1025: break;
1026:
1027: case BS_OPENCONFIRM:
1028: /*
1029: * Description of collision detection rules in RFC 4271 is confusing and
1030: * contradictory, but it is essentially:
1031: *
1032: * 1. Router with higher ID is dominant
1033: * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
1034: * 3. When both connections are in OpenConfirm state, one initiated by
1035: * the dominant router is kept.
1036: *
1037: * The first line in the expression below evaluates whether the neighbor
1038: * is dominant, the second line whether the new connection was initiated
1039: * by the neighbor. If both are true (or both are false), we keep the new
1040: * connection, otherwise we keep the old one.
1041: */
1042: if (((p->local_id < id) || ((p->local_id == id) && (p->local_as < p->remote_as)))
1043: == (conn == &p->incoming_conn))
1044: {
1045: /* Should close the other connection */
1046: BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
1047: bgp_error(other, 6, 7, NULL, 0);
1048: break;
1049: }
1050: /* Fall thru */
1051: case BS_ESTABLISHED:
1052: /* Should close this connection */
1053: BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
1054: bgp_error(conn, 6, 7, NULL, 0);
1055: return;
1056: default:
1057: bug("bgp_rx_open: Unknown state");
1058: }
1059:
1060: /* Update our local variables */
1061: conn->hold_time = MIN(hold, p->cf->hold_time);
1062: conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
1063: p->remote_id = id;
1064: p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
1065: p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
1066: p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
1.1.1.2 ! misho 1067: p->gr_ready = (p->cf->gr_mode && conn->peer_gr_able) ||
! 1068: (p->cf->llgr_mode && conn->peer_llgr_able);
1.1 misho 1069: p->ext_messages = p->cf->enable_extended_messages && conn->peer_ext_messages_support;
1070:
1.1.1.2 ! misho 1071: /* Update RA mode */
1.1 misho 1072: if (p->add_path_tx)
1073: p->p.accept_ra_types = RA_ANY;
1.1.1.2 ! misho 1074: else if (p->cf->secondary)
! 1075: p->p.accept_ra_types = RA_ACCEPTED;
! 1076: else
! 1077: p->p.accept_ra_types = RA_OPTIMAL;
1.1 misho 1078:
1079: DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
1080:
1081: bgp_schedule_packet(conn, PKT_KEEPALIVE);
1082: bgp_start_timer(conn->hold_timer, conn->hold_time);
1083: bgp_conn_enter_openconfirm_state(conn);
1084: }
1085:
1086:
1087: static inline void
1088: bgp_rx_end_mark(struct bgp_proto *p)
1089: {
1090: BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
1091:
1092: if (p->load_state == BFS_LOADING)
1093: p->load_state = BFS_NONE;
1094:
1095: if (p->p.gr_recovery)
1096: proto_graceful_restart_unlock(&p->p);
1097:
1098: if (p->gr_active)
1099: bgp_graceful_restart_done(p);
1100: }
1101:
1102:
1103: #define DECODE_PREFIX(pp, ll) do { \
1104: if (p->add_path_rx) \
1105: { \
1106: if (ll < 5) { err=1; goto done; } \
1107: path_id = get_u32(pp); \
1108: pp += 4; \
1109: ll -= 4; \
1110: } \
1111: int b = *pp++; \
1112: int q; \
1113: ll--; \
1114: if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \
1115: q = (b+7) / 8; \
1116: if (ll < q) { err=1; goto done; } \
1117: memcpy(&prefix, pp, q); \
1118: pp += q; \
1119: ll -= q; \
1120: ipa_ntoh(prefix); \
1121: prefix = ipa_and(prefix, ipa_mkmask(b)); \
1122: pxlen = b; \
1123: } while (0)
1124:
1125:
1126: static inline void
1127: bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen,
1128: u32 path_id, u32 *last_id, struct rte_src **src,
1129: rta *a0, rta **a)
1130: {
1131: if (path_id != *last_id)
1132: {
1133: *src = rt_get_source(&p->p, path_id);
1134: *last_id = path_id;
1135:
1136: if (*a)
1137: {
1138: rta_free(*a);
1139: *a = NULL;
1140: }
1141: }
1142:
1143: /* Prepare cached route attributes */
1144: if (!*a)
1145: {
1146: a0->src = *src;
1147:
1148: /* Workaround for rta_lookup() breaking eattrs */
1149: ea_list *ea = a0->eattrs;
1150: *a = rta_lookup(a0);
1151: a0->eattrs = ea;
1152: }
1153:
1154: net *n = net_get(p->p.table, prefix, pxlen);
1155: rte *e = rte_get_temp(rta_clone(*a));
1156: e->net = n;
1157: e->pflags = 0;
1158: e->u.bgp.suppressed = 0;
1.1.1.2 ! misho 1159: e->u.bgp.stale = -1;
1.1 misho 1160: rte_update2(p->p.main_ahook, n, e, *src);
1161: }
1162:
1163: static inline void
1164: bgp_rte_withdraw(struct bgp_proto *p, ip_addr prefix, int pxlen,
1165: u32 path_id, u32 *last_id, struct rte_src **src)
1166: {
1167: if (path_id != *last_id)
1168: {
1169: *src = rt_find_source(&p->p, path_id);
1170: *last_id = path_id;
1171: }
1172:
1173: net *n = net_find(p->p.table, prefix, pxlen);
1174: rte_update2( p->p.main_ahook, n, NULL, *src);
1175: }
1176:
1177: static inline int
1178: bgp_set_next_hop(struct bgp_proto *p, rta *a)
1179: {
1180: struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
1181: ip_addr *nexthop = (ip_addr *) nh->u.ptr->data;
1182:
1183: #ifdef IPV6
1184: int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]);
1185:
1186: /* First address should not be link-local, but may be zero in direct mode */
1187: if (ipa_is_link_local(*nexthop))
1188: *nexthop = IPA_NONE;
1189: #else
1190: int second = 0;
1191: #endif
1192:
1193: if (p->cf->gw_mode == GW_DIRECT)
1194: {
1195: neighbor *ng = NULL;
1196:
1197: if (ipa_nonzero(*nexthop))
1198: ng = neigh_find(&p->p, nexthop, 0);
1199: else if (second) /* GW_DIRECT -> single_hop -> p->neigh != NULL */
1200: ng = neigh_find2(&p->p, nexthop + 1, p->neigh->iface, 0);
1201:
1202: /* Fallback */
1203: if (!ng)
1204: ng = p->neigh;
1205:
1206: if (ng->scope == SCOPE_HOST)
1207: return 0;
1208:
1209: a->dest = RTD_ROUTER;
1210: a->gw = ng->addr;
1211: a->iface = ng->iface;
1212: a->hostentry = NULL;
1213: a->igp_metric = 0;
1214: }
1215: else /* GW_RECURSIVE */
1216: {
1217: if (ipa_zero(*nexthop))
1218: return 0;
1219:
1220: rta_set_recursive_next_hop(p->p.table, a, p->igp_table, nexthop, nexthop + second);
1221: }
1222:
1223: return 1;
1224: }
1225:
1226: #ifndef IPV6 /* IPv4 version */
1227:
1228: static void
1229: bgp_do_rx_update(struct bgp_conn *conn,
1230: byte *withdrawn, int withdrawn_len,
1231: byte *nlri, int nlri_len,
1232: byte *attrs, int attr_len)
1233: {
1234: struct bgp_proto *p = conn->bgp;
1235: struct rte_src *src = p->p.main_source;
1236: rta *a0, *a = NULL;
1237: ip_addr prefix;
1238: int pxlen, err = 0;
1239: u32 path_id = 0;
1240: u32 last_id = 0;
1241:
1242: /* Check for End-of-RIB marker */
1243: if (!withdrawn_len && !attr_len && !nlri_len)
1244: {
1245: bgp_rx_end_mark(p);
1246: return;
1247: }
1248:
1249: /* Withdraw routes */
1250: while (withdrawn_len)
1251: {
1252: DECODE_PREFIX(withdrawn, withdrawn_len);
1253: DBG("Withdraw %I/%d\n", prefix, pxlen);
1254:
1255: bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1256: }
1257:
1258: if (!attr_len && !nlri_len) /* shortcut */
1259: return;
1260:
1261: a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
1262:
1263: if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1264: return;
1265:
1266: if (a0 && nlri_len && !bgp_set_next_hop(p, a0))
1267: a0 = NULL;
1268:
1269: last_id = 0;
1270: src = p->p.main_source;
1271:
1272: while (nlri_len)
1273: {
1274: DECODE_PREFIX(nlri, nlri_len);
1275: DBG("Add %I/%d\n", prefix, pxlen);
1276:
1277: if (a0)
1278: bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1279: else /* Forced withdraw as a result of soft error */
1280: bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1281: }
1282:
1283: done:
1284: if (a)
1285: rta_free(a);
1286:
1287: if (err)
1288: bgp_error(conn, 3, err, NULL, 0);
1289:
1290: return;
1291: }
1292:
1293: #else /* IPv6 version */
1294:
1295: #define DO_NLRI(name) \
1296: x = p->name##_start; \
1297: len = len0 = p->name##_len; \
1298: if (len) \
1299: { \
1300: if (len < 3) { err=9; goto done; } \
1301: af = get_u16(x); \
1302: x += 3; \
1303: len -= 3; \
1304: DBG("\tNLRI AF=%d sub=%d len=%d\n", af, x[-1], len);\
1305: } \
1306: else \
1307: af = 0; \
1308: if (af == BGP_AF_IPV6)
1309:
1310: static void
1311: bgp_attach_next_hop(rta *a0, byte *x)
1312: {
1313: ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
1314: memcpy(nh, x+1, 16);
1315: ipa_ntoh(nh[0]);
1316:
1317: /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
1318: if (*x == 32)
1319: {
1320: memcpy(nh+1, x+17, 16);
1321: ipa_ntoh(nh[1]);
1322: }
1323: else
1324: nh[1] = IPA_NONE;
1325: }
1326:
1327:
1328: static void
1329: bgp_do_rx_update(struct bgp_conn *conn,
1330: byte *withdrawn UNUSED, int withdrawn_len,
1331: byte *nlri UNUSED, int nlri_len,
1332: byte *attrs, int attr_len)
1333: {
1334: struct bgp_proto *p = conn->bgp;
1335: struct rte_src *src = p->p.main_source;
1336: byte *x;
1337: int len, len0;
1338: unsigned af;
1339: rta *a0, *a = NULL;
1340: ip_addr prefix;
1341: int pxlen, err = 0;
1342: u32 path_id = 0;
1343: u32 last_id = 0;
1344:
1345: p->mp_reach_len = 0;
1346: p->mp_unreach_len = 0;
1347: a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
1348:
1349: if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1350: return;
1351:
1352: /* Check for End-of-RIB marker */
1353: if ((attr_len < 8) && !withdrawn_len && !nlri_len && !p->mp_reach_len &&
1354: (p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6))
1355: {
1356: bgp_rx_end_mark(p);
1357: return;
1358: }
1359:
1360: DO_NLRI(mp_unreach)
1361: {
1362: while (len)
1363: {
1364: DECODE_PREFIX(x, len);
1365: DBG("Withdraw %I/%d\n", prefix, pxlen);
1366: bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1367: }
1368: }
1369:
1370: DO_NLRI(mp_reach)
1371: {
1372: /* Create fake NEXT_HOP attribute */
1373: if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
1374: { err = 9; goto done; }
1375:
1376: if (a0)
1377: bgp_attach_next_hop(a0, x);
1378:
1379: /* Also ignore one reserved byte */
1380: len -= *x + 2;
1381: x += *x + 2;
1382:
1383: if (a0 && ! bgp_set_next_hop(p, a0))
1384: a0 = NULL;
1385:
1386: last_id = 0;
1387: src = p->p.main_source;
1388:
1389: while (len)
1390: {
1391: DECODE_PREFIX(x, len);
1392: DBG("Add %I/%d\n", prefix, pxlen);
1393:
1394: if (a0)
1395: bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1396: else /* Forced withdraw as a result of soft error */
1397: bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1398: }
1399: }
1400:
1401: done:
1402: if (a)
1403: rta_free(a);
1404:
1405: if (err) /* Use subcode 9, not err */
1406: bgp_error(conn, 3, 9, NULL, 0);
1407:
1408: return;
1409: }
1410:
1411: #endif
1412:
1413: static void
1414: bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
1415: {
1416: struct bgp_proto *p = conn->bgp;
1417: byte *withdrawn, *attrs, *nlri;
1418: uint withdrawn_len, attr_len, nlri_len;
1419:
1420: BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
1421:
1422: /* Workaround for some BGP implementations that skip initial KEEPALIVE */
1423: if (conn->state == BS_OPENCONFIRM)
1424: bgp_conn_enter_established_state(conn);
1425:
1426: if (conn->state != BS_ESTABLISHED)
1427: { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1428: bgp_start_timer(conn->hold_timer, conn->hold_time);
1429:
1430: /* Find parts of the packet and check sizes */
1431: if (len < 23)
1432: {
1433: bgp_error(conn, 1, 2, pkt+16, 2);
1434: return;
1435: }
1436: withdrawn = pkt + 21;
1437: withdrawn_len = get_u16(pkt + 19);
1438: if (withdrawn_len + 23 > len)
1439: goto malformed;
1440: attrs = withdrawn + withdrawn_len + 2;
1441: attr_len = get_u16(attrs - 2);
1442: if (withdrawn_len + attr_len + 23 > len)
1443: goto malformed;
1444: nlri = attrs + attr_len;
1445: nlri_len = len - withdrawn_len - attr_len - 23;
1446: if (!attr_len && nlri_len)
1447: goto malformed;
1448: DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
1449:
1450: lp_flush(bgp_linpool);
1451:
1452: bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
1453: return;
1454:
1455: malformed:
1456: bgp_error(conn, 3, 1, NULL, 0);
1457: }
1458:
1459: static struct {
1460: byte major, minor;
1461: byte *msg;
1462: } bgp_msg_table[] = {
1463: { 1, 0, "Invalid message header" },
1464: { 1, 1, "Connection not synchronized" },
1465: { 1, 2, "Bad message length" },
1466: { 1, 3, "Bad message type" },
1467: { 2, 0, "Invalid OPEN message" },
1468: { 2, 1, "Unsupported version number" },
1469: { 2, 2, "Bad peer AS" },
1470: { 2, 3, "Bad BGP identifier" },
1471: { 2, 4, "Unsupported optional parameter" },
1472: { 2, 5, "Authentication failure" },
1473: { 2, 6, "Unacceptable hold time" },
1474: { 2, 7, "Required capability missing" }, /* [RFC5492] */
1475: { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
1476: { 3, 0, "Invalid UPDATE message" },
1477: { 3, 1, "Malformed attribute list" },
1478: { 3, 2, "Unrecognized well-known attribute" },
1479: { 3, 3, "Missing mandatory attribute" },
1480: { 3, 4, "Invalid attribute flags" },
1481: { 3, 5, "Invalid attribute length" },
1482: { 3, 6, "Invalid ORIGIN attribute" },
1483: { 3, 7, "AS routing loop" }, /* Deprecated */
1484: { 3, 8, "Invalid NEXT_HOP attribute" },
1485: { 3, 9, "Optional attribute error" },
1486: { 3, 10, "Invalid network field" },
1487: { 3, 11, "Malformed AS_PATH" },
1488: { 4, 0, "Hold timer expired" },
1489: { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
1490: { 5, 1, "Unexpected message in OpenSent state" },
1491: { 5, 2, "Unexpected message in OpenConfirm state" },
1492: { 5, 3, "Unexpected message in Established state" },
1493: { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
1494: { 6, 1, "Maximum number of prefixes reached" },
1495: { 6, 2, "Administrative shutdown" },
1496: { 6, 3, "Peer de-configured" },
1497: { 6, 4, "Administrative reset" },
1498: { 6, 5, "Connection rejected" },
1499: { 6, 6, "Other configuration change" },
1500: { 6, 7, "Connection collision resolution" },
1501: { 6, 8, "Out of Resources" },
1502: { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
1503: { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
1504: };
1505:
1506: /**
1507: * bgp_error_dsc - return BGP error description
1508: * @code: BGP error code
1509: * @subcode: BGP error subcode
1510: *
1511: * bgp_error_dsc() returns error description for BGP errors
1512: * which might be static string or given temporary buffer.
1513: */
1514: const char *
1515: bgp_error_dsc(unsigned code, unsigned subcode)
1516: {
1517: static char buff[32];
1518: unsigned i;
1519: for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
1520: if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
1521: {
1522: return bgp_msg_table[i].msg;
1523: }
1524:
1525: bsprintf(buff, "Unknown error %d.%d", code, subcode);
1526: return buff;
1527: }
1528:
1.1.1.2 ! misho 1529: /* RFC 8203 - shutdown communication message */
! 1530: static int
! 1531: bgp_handle_message(struct bgp_proto *p, byte *data, uint len, byte **bp)
! 1532: {
! 1533: byte *msg = data + 1;
! 1534: uint msg_len = data[0];
! 1535: uint i;
! 1536:
! 1537: /* Handle zero length message */
! 1538: if (msg_len == 0)
! 1539: return 1;
! 1540:
! 1541: /* Handle proper message */
! 1542: if (msg_len + 1 > len)
! 1543: return 0;
! 1544:
! 1545: /* Some elementary cleanup */
! 1546: for (i = 0; i < msg_len; i++)
! 1547: if (msg[i] < ' ')
! 1548: msg[i] = ' ';
! 1549:
! 1550: proto_set_message(&p->p, msg, msg_len);
! 1551: *bp += bsprintf(*bp, ": \"%s\"", p->p.message);
! 1552: return 1;
! 1553: }
! 1554:
1.1 misho 1555: void
1556: bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
1557: {
1.1.1.2 ! misho 1558: byte argbuf[256+16], *t = argbuf;
1.1 misho 1559: unsigned i;
1560:
1561: /* Don't report Cease messages generated by myself */
1562: if (code == 6 && class == BE_BGP_TX)
1563: return;
1564:
1.1.1.2 ! misho 1565: /* Reset shutdown message */
! 1566: if ((code == 6) && ((subcode == 2) || (subcode == 4)))
! 1567: proto_set_message(&p->p, NULL, 0);
! 1568:
1.1 misho 1569: if (len)
1570: {
1.1.1.2 ! misho 1571: /* Bad peer AS - we would like to print the AS */
1.1 misho 1572: if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
1573: {
1.1.1.2 ! misho 1574: t += bsprintf(t, ": %u", (len == 2) ? get_u16(data) : get_u32(data));
1.1 misho 1575: goto done;
1576: }
1.1.1.2 ! misho 1577:
! 1578: /* RFC 8203 - shutdown communication */
! 1579: if (((code == 6) && ((subcode == 2) || (subcode == 4))))
! 1580: if (bgp_handle_message(p, data, len, &t))
! 1581: goto done;
! 1582:
! 1583: *t++ = ':';
! 1584: *t++ = ' ';
1.1 misho 1585: if (len > 16)
1586: len = 16;
1587: for (i=0; i<len; i++)
1588: t += bsprintf(t, "%02x", data[i]);
1589: }
1.1.1.2 ! misho 1590:
! 1591: done:
1.1 misho 1592: *t = 0;
1.1.1.2 ! misho 1593: const byte *dsc = bgp_error_dsc(code, subcode);
! 1594: log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, dsc, argbuf);
1.1 misho 1595: }
1596:
1597: static void
1598: bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
1599: {
1600: struct bgp_proto *p = conn->bgp;
1601: if (len < 21)
1602: {
1603: bgp_error(conn, 1, 2, pkt+16, 2);
1604: return;
1605: }
1606:
1607: unsigned code = pkt[19];
1608: unsigned subcode = pkt[20];
1609: int err = (code != 6);
1610:
1611: bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
1612: bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
1613:
1614: #ifndef IPV6
1615: if ((code == 2) && ((subcode == 4) || (subcode == 7))
1616: /* Error related to capability:
1617: * 4 - Peer does not support capabilities at all.
1618: * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
1619: */
1620: && (p->cf->capabilities == 2)
1621: /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
1622: && (conn->start_state == BSS_CONNECT)
1623: /* Failed connection attempt have used capabilities */
1624: && (p->cf->remote_as <= 0xFFFF))
1625: /* Not possible with disabled capabilities */
1626: {
1627: /* We try connect without capabilities */
1628: log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
1629: p->start_state = BSS_CONNECT_NOCAP;
1630: err = 0;
1631: }
1632: #endif
1633:
1634: bgp_conn_enter_close_state(conn);
1635: bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
1636:
1637: if (err)
1638: {
1639: bgp_update_startup_delay(p);
1.1.1.2 ! misho 1640: bgp_stop(p, 0, NULL, 0);
! 1641: }
! 1642: else
! 1643: {
! 1644: uint subcode_bit = 1 << ((subcode <= 8) ? subcode : 0);
! 1645: if (p->cf->disable_after_cease & subcode_bit)
! 1646: {
! 1647: log(L_INFO "%s: Disabled after Cease notification", p->p.name);
! 1648: p->startup_delay = 0;
! 1649: p->p.disabled = 1;
! 1650: }
1.1 misho 1651: }
1652: }
1653:
1654: static void
1655: bgp_rx_keepalive(struct bgp_conn *conn)
1656: {
1657: struct bgp_proto *p = conn->bgp;
1658:
1659: BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
1660: bgp_start_timer(conn->hold_timer, conn->hold_time);
1661: switch (conn->state)
1662: {
1663: case BS_OPENCONFIRM:
1664: bgp_conn_enter_established_state(conn);
1665: break;
1666: case BS_ESTABLISHED:
1667: break;
1668: default:
1669: bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
1670: }
1671: }
1672:
1673: static void
1674: bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
1675: {
1676: struct bgp_proto *p = conn->bgp;
1677:
1678: if (conn->state != BS_ESTABLISHED)
1679: { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1680:
1681: if (!p->cf->enable_refresh)
1682: { bgp_error(conn, 1, 3, pkt+18, 1); return; }
1683:
1684: if (len < (BGP_HEADER_LENGTH + 4))
1685: { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1686:
1687: if (len > (BGP_HEADER_LENGTH + 4))
1688: { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
1689:
1690: /* FIXME - we ignore AFI/SAFI values, as we support
1691: just one value and even an error code for an invalid
1692: request is not defined */
1693:
1694: /* RFC 7313 redefined reserved field as RR message subtype */
1695: uint subtype = conn->peer_enhanced_refresh_support ? pkt[21] : BGP_RR_REQUEST;
1696:
1697: switch (subtype)
1698: {
1699: case BGP_RR_REQUEST:
1700: BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
1701: proto_request_feeding(&p->p);
1702: break;
1703:
1704: case BGP_RR_BEGIN:
1705: BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
1706: bgp_refresh_begin(p);
1707: break;
1708:
1709: case BGP_RR_END:
1710: BGP_TRACE(D_PACKETS, "Got END-OF-RR");
1711: bgp_refresh_end(p);
1712: break;
1713:
1714: default:
1715: log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
1716: p->p.name, subtype);
1717: break;
1718: }
1719: }
1720:
1721:
1722: /**
1723: * bgp_rx_packet - handle a received packet
1724: * @conn: BGP connection
1725: * @pkt: start of the packet
1726: * @len: packet size
1727: *
1728: * bgp_rx_packet() takes a newly received packet and calls the corresponding
1729: * packet handler according to the packet type.
1730: */
1731: static void
1732: bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
1733: {
1734: byte type = pkt[18];
1735:
1736: DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
1737:
1738: if (conn->bgp->p.mrtdump & MD_MESSAGES)
1.1.1.2 ! misho 1739: bgp_dump_message(conn, pkt, len);
1.1 misho 1740:
1741: switch (type)
1742: {
1743: case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
1744: case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
1745: case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
1746: case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
1747: case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
1748: default: bgp_error(conn, 1, 3, pkt+18, 1);
1749: }
1750: }
1751:
1752: /**
1753: * bgp_rx - handle received data
1754: * @sk: socket
1755: * @size: amount of data received
1756: *
1757: * bgp_rx() is called by the socket layer whenever new data arrive from
1758: * the underlying TCP connection. It assembles the data fragments to packets,
1759: * checks their headers and framing and passes complete packets to
1760: * bgp_rx_packet().
1761: */
1762: int
1763: bgp_rx(sock *sk, uint size)
1764: {
1765: struct bgp_conn *conn = sk->data;
1766: struct bgp_proto *p = conn->bgp;
1767: byte *pkt_start = sk->rbuf;
1768: byte *end = pkt_start + size;
1769: unsigned i, len;
1770:
1771: DBG("BGP: RX hook: Got %d bytes\n", size);
1772: while (end >= pkt_start + BGP_HEADER_LENGTH)
1773: {
1774: if ((conn->state == BS_CLOSE) || (conn->sk != sk))
1775: return 0;
1776: for(i=0; i<16; i++)
1777: if (pkt_start[i] != 0xff)
1778: {
1779: bgp_error(conn, 1, 1, NULL, 0);
1780: break;
1781: }
1782: len = get_u16(pkt_start+16);
1783: if (len < BGP_HEADER_LENGTH || len > bgp_max_packet_length(p))
1784: {
1785: bgp_error(conn, 1, 2, pkt_start+16, 2);
1786: break;
1787: }
1788: if (end < pkt_start + len)
1789: break;
1790: bgp_rx_packet(conn, pkt_start, len);
1791: pkt_start += len;
1792: }
1793: if (pkt_start != sk->rbuf)
1794: {
1795: memmove(sk->rbuf, pkt_start, end - pkt_start);
1796: sk->rpos = sk->rbuf + (end - pkt_start);
1797: }
1798: return 0;
1799: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>