Annotation of embedaddon/bird/proto/bgp/packets.c, revision 1.1.1.1
1.1 misho 1: /*
2: * BIRD -- BGP Packet Processing
3: *
4: * (c) 2000 Martin Mares <mj@ucw.cz>
5: *
6: * Can be freely distributed and used under the terms of the GNU GPL.
7: */
8:
9: #undef LOCAL_DEBUG
10:
11: #include "nest/bird.h"
12: #include "nest/iface.h"
13: #include "nest/protocol.h"
14: #include "nest/route.h"
15: #include "nest/attrs.h"
16: #include "nest/mrtdump.h"
17: #include "conf/conf.h"
18: #include "lib/unaligned.h"
19: #include "lib/socket.h"
20:
21: #include "nest/cli.h"
22:
23: #include "bgp.h"
24:
25:
26: #define BGP_RR_REQUEST 0
27: #define BGP_RR_BEGIN 1
28: #define BGP_RR_END 2
29:
30:
31: static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
32: static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
33:
34: /* Table for state -> RFC 6608 FSM error subcodes */
35: static byte fsm_err_subcode[BS_MAX] = {
36: [BS_OPENSENT] = 1,
37: [BS_OPENCONFIRM] = 2,
38: [BS_ESTABLISHED] = 3
39: };
40:
41: /*
42: * MRT Dump format is not semantically specified.
43: * We will use these values in appropriate fields:
44: *
45: * Local AS, Remote AS - configured AS numbers for given BGP instance.
46: * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
47: *
48: * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
49: * changes) and MESSAGE (for received BGP messages).
50: *
51: * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
52: * only when AS4 session is established and even in that case MESSAGE
53: * does not use AS4 variant for initial OPEN message. This strange
54: * behavior is here for compatibility with Quagga and Bgpdump,
55: */
56:
57: static byte *
58: mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
59: {
60: struct bgp_proto *p = conn->bgp;
61:
62: if (as4)
63: {
64: put_u32(buf+0, p->remote_as);
65: put_u32(buf+4, p->local_as);
66: buf+=8;
67: }
68: else
69: {
70: put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
71: put_u16(buf+2, (p->local_as <= 0xFFFF) ? p->local_as : AS_TRANS);
72: buf+=4;
73: }
74:
75: put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
76: put_u16(buf+2, BGP_AF);
77: buf+=4;
78: buf = put_ipa(buf, conn->sk ? conn->sk->daddr : IPA_NONE);
79: buf = put_ipa(buf, conn->sk ? conn->sk->saddr : IPA_NONE);
80:
81: return buf;
82: }
83:
84: static void
85: mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
86: {
87: byte *buf = alloca(128+len); /* 128 is enough for MRT headers */
88: byte *bp = buf + MRTDUMP_HDR_LENGTH;
89: int as4 = conn->bgp->as4_session;
90:
91: bp = mrt_put_bgp4_hdr(bp, conn, as4);
92: memcpy(bp, pkt, len);
93: bp += len;
94: mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
95: buf, bp-buf);
96: }
97:
98: static inline u16
99: convert_state(unsigned state)
100: {
101: /* Convert state from our BS_* values to values used in MRTDump */
102: return (state == BS_CLOSE) ? 1 : state + 1;
103: }
104:
105: void
106: mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new)
107: {
108: byte buf[128];
109: byte *bp = buf + MRTDUMP_HDR_LENGTH;
110:
111: bp = mrt_put_bgp4_hdr(bp, conn, 1);
112: put_u16(bp+0, convert_state(old));
113: put_u16(bp+2, convert_state(new));
114: bp += 4;
115: mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
116: }
117:
118: static byte *
119: bgp_create_notification(struct bgp_conn *conn, byte *buf)
120: {
121: struct bgp_proto *p = conn->bgp;
122:
123: BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
124: buf[0] = conn->notify_code;
125: buf[1] = conn->notify_subcode;
126: memcpy(buf+2, conn->notify_data, conn->notify_size);
127: return buf + 2 + conn->notify_size;
128: }
129:
130: #ifdef IPV6
131: static byte *
132: bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf)
133: {
134: *buf++ = 1; /* Capability 1: Multiprotocol extensions */
135: *buf++ = 4; /* Capability data length */
136: *buf++ = 0; /* We support AF IPv6 */
137: *buf++ = BGP_AF_IPV6;
138: *buf++ = 0; /* RFU */
139: *buf++ = 1; /* and SAFI 1 */
140: return buf;
141: }
142:
143: #else
144:
145: static byte *
146: bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf)
147: {
148: *buf++ = 1; /* Capability 1: Multiprotocol extensions */
149: *buf++ = 4; /* Capability data length */
150: *buf++ = 0; /* We support AF IPv4 */
151: *buf++ = BGP_AF_IPV4;
152: *buf++ = 0; /* RFU */
153: *buf++ = 1; /* and SAFI 1 */
154: return buf;
155: }
156: #endif
157:
158: static byte *
159: bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
160: {
161: *buf++ = 2; /* Capability 2: Support for route refresh */
162: *buf++ = 0; /* Capability data length */
163: return buf;
164: }
165:
166: static byte *
167: bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf)
168: {
169: *buf++ = 6; /* Capability 6: Support for extended messages */
170: *buf++ = 0; /* Capability data length */
171: return buf;
172: }
173:
174: static byte *
175: bgp_put_cap_gr1(struct bgp_proto *p, byte *buf)
176: {
177: *buf++ = 64; /* Capability 64: Support for graceful restart */
178: *buf++ = 6; /* Capability data length */
179:
180: put_u16(buf, p->cf->gr_time);
181: if (p->p.gr_recovery)
182: buf[0] |= BGP_GRF_RESTART;
183: buf += 2;
184:
185: *buf++ = 0; /* Appropriate AF */
186: *buf++ = BGP_AF;
187: *buf++ = 1; /* and SAFI 1 */
188: *buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0;
189:
190: return buf;
191: }
192:
193: static byte *
194: bgp_put_cap_gr2(struct bgp_proto *p UNUSED, byte *buf)
195: {
196: *buf++ = 64; /* Capability 64: Support for graceful restart */
197: *buf++ = 2; /* Capability data length */
198: put_u16(buf, 0);
199: return buf + 2;
200: }
201:
202: static byte *
203: bgp_put_cap_as4(struct bgp_proto *p, byte *buf)
204: {
205: *buf++ = 65; /* Capability 65: Support for 4-octet AS number */
206: *buf++ = 4; /* Capability data length */
207: put_u32(buf, p->local_as);
208: return buf + 4;
209: }
210:
211: static byte *
212: bgp_put_cap_add_path(struct bgp_proto *p, byte *buf)
213: {
214: *buf++ = 69; /* Capability 69: Support for ADD-PATH */
215: *buf++ = 4; /* Capability data length */
216:
217: *buf++ = 0; /* Appropriate AF */
218: *buf++ = BGP_AF;
219: *buf++ = 1; /* SAFI 1 */
220:
221: *buf++ = p->cf->add_path;
222:
223: return buf;
224: }
225:
226: static byte *
227: bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf)
228: {
229: *buf++ = 70; /* Capability 70: Support for enhanced route refresh */
230: *buf++ = 0; /* Capability data length */
231: return buf;
232: }
233:
234:
235: static byte *
236: bgp_create_open(struct bgp_conn *conn, byte *buf)
237: {
238: struct bgp_proto *p = conn->bgp;
239: byte *cap;
240: int cap_len;
241:
242: BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
243: BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
244: buf[0] = BGP_VERSION;
245: put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
246: put_u16(buf+3, p->cf->hold_time);
247: put_u32(buf+5, p->local_id);
248:
249: if (conn->start_state == BSS_CONNECT_NOCAP)
250: {
251: BGP_TRACE(D_PACKETS, "Skipping capabilities");
252: buf[9] = 0;
253: return buf + 10;
254: }
255:
256: /* Skipped 3 B for length field and Capabilities parameter header */
257: cap = buf + 12;
258:
259: #ifndef IPV6
260: if (p->cf->advertise_ipv4)
261: cap = bgp_put_cap_ipv4(p, cap);
262: #endif
263:
264: #ifdef IPV6
265: cap = bgp_put_cap_ipv6(p, cap);
266: #endif
267:
268: if (p->cf->enable_refresh)
269: cap = bgp_put_cap_rr(p, cap);
270:
271: if (p->cf->gr_mode == BGP_GR_ABLE)
272: cap = bgp_put_cap_gr1(p, cap);
273: else if (p->cf->gr_mode == BGP_GR_AWARE)
274: cap = bgp_put_cap_gr2(p, cap);
275:
276: if (p->cf->enable_as4)
277: cap = bgp_put_cap_as4(p, cap);
278:
279: if (p->cf->add_path)
280: cap = bgp_put_cap_add_path(p, cap);
281:
282: if (p->cf->enable_refresh)
283: cap = bgp_put_cap_err(p, cap);
284:
285: if (p->cf->enable_extended_messages)
286: cap = bgp_put_cap_ext_msg(p, cap);
287:
288: cap_len = cap - buf - 12;
289: if (cap_len > 0)
290: {
291: buf[9] = cap_len + 2; /* Optional params len */
292: buf[10] = 2; /* Option: Capability list */
293: buf[11] = cap_len; /* Option length */
294: return cap;
295: }
296: else
297: {
298: buf[9] = 0; /* No optional parameters */
299: return buf + 10;
300: }
301: }
302:
303: static uint
304: bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, uint remains)
305: {
306: byte *start = w;
307: ip_addr a;
308: int bytes;
309:
310: while (!EMPTY_LIST(buck->prefixes) && (remains >= (5+sizeof(ip_addr))))
311: {
312: struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
313: DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
314:
315: if (p->add_path_tx)
316: {
317: put_u32(w, px->path_id);
318: w += 4;
319: remains -= 4;
320: }
321:
322: *w++ = px->n.pxlen;
323: bytes = (px->n.pxlen + 7) / 8;
324: a = px->n.prefix;
325: ipa_hton(a);
326: memcpy(w, &a, bytes);
327: w += bytes;
328: remains -= bytes + 1;
329: rem_node(&px->bucket_node);
330: bgp_free_prefix(p, px);
331: // fib_delete(&p->prefix_fib, px);
332: }
333: return w - start;
334: }
335:
336: static void
337: bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
338: {
339: while (!EMPTY_LIST(buck->prefixes))
340: {
341: struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
342: log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
343: rem_node(&px->bucket_node);
344: bgp_free_prefix(p, px);
345: // fib_delete(&p->prefix_fib, px);
346: }
347: }
348:
349: #ifndef IPV6 /* IPv4 version */
350:
351: static byte *
352: bgp_create_update(struct bgp_conn *conn, byte *buf)
353: {
354: struct bgp_proto *p = conn->bgp;
355: struct bgp_bucket *buck;
356: int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
357: byte *w;
358: int wd_size = 0;
359: int r_size = 0;
360: int a_size = 0;
361:
362: w = buf+2;
363: if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
364: {
365: DBG("Withdrawn routes:\n");
366: wd_size = bgp_encode_prefixes(p, w, buck, remains);
367: w += wd_size;
368: remains -= wd_size;
369: }
370: put_u16(buf, wd_size);
371:
372: if (!wd_size)
373: {
374: while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
375: {
376: if (EMPTY_LIST(buck->prefixes))
377: {
378: DBG("Deleting empty bucket %p\n", buck);
379: rem_node(&buck->send_node);
380: bgp_free_bucket(p, buck);
381: continue;
382: }
383:
384: DBG("Processing bucket %p\n", buck);
385: a_size = bgp_encode_attrs(p, w+2, buck->eattrs, remains - 1024);
386:
387: if (a_size < 0)
388: {
389: log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
390: bgp_flush_prefixes(p, buck);
391: rem_node(&buck->send_node);
392: bgp_free_bucket(p, buck);
393: continue;
394: }
395:
396: put_u16(w, a_size);
397: w += a_size + 2;
398: r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
399: w += r_size;
400: break;
401: }
402: }
403: if (!a_size) /* Attributes not already encoded */
404: {
405: put_u16(w, 0);
406: w += 2;
407: }
408: if (wd_size || r_size)
409: {
410: BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
411: return w;
412: }
413: else
414: return NULL;
415: }
416:
417: static byte *
418: bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
419: {
420: struct bgp_proto *p = conn->bgp;
421: BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
422:
423: put_u32(buf, 0);
424: return buf+4;
425: }
426:
427: #else /* IPv6 version */
428:
429: static inline int
430: same_iface(struct bgp_proto *p, ip_addr *ip)
431: {
432: neighbor *n = neigh_find(&p->p, ip, 0);
433: return n && p->neigh && n->iface == p->neigh->iface;
434: }
435:
436: static byte *
437: bgp_create_update(struct bgp_conn *conn, byte *buf)
438: {
439: struct bgp_proto *p = conn->bgp;
440: struct bgp_bucket *buck;
441: int size, second, rem_stored;
442: int remains = bgp_max_packet_length(p) - BGP_HEADER_LENGTH - 4;
443: byte *w, *w_stored, *tmp, *tstart;
444: ip_addr *ipp, ip, ip_ll;
445: ea_list *ea;
446: eattr *nh;
447:
448: put_u16(buf, 0);
449: w = buf+4;
450:
451: if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
452: {
453: DBG("Withdrawn routes:\n");
454: tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
455: *tmp++ = 0;
456: *tmp++ = BGP_AF_IPV6;
457: *tmp++ = 1;
458: ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
459: size = bgp_encode_attrs(p, w, ea, remains);
460: ASSERT(size >= 0);
461: w += size;
462: remains -= size;
463: }
464: else
465: {
466: while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
467: {
468: if (EMPTY_LIST(buck->prefixes))
469: {
470: DBG("Deleting empty bucket %p\n", buck);
471: rem_node(&buck->send_node);
472: bgp_free_bucket(p, buck);
473: continue;
474: }
475:
476: DBG("Processing bucket %p\n", buck);
477: rem_stored = remains;
478: w_stored = w;
479:
480: size = bgp_encode_attrs(p, w, buck->eattrs, remains - 1024);
481: if (size < 0)
482: {
483: log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
484: bgp_flush_prefixes(p, buck);
485: rem_node(&buck->send_node);
486: bgp_free_bucket(p, buck);
487: continue;
488: }
489: w += size;
490: remains -= size;
491:
492: /* We have two addresses here in NEXT_HOP eattr. Really.
493: Unless NEXT_HOP was modified by filter */
494: nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
495: ASSERT(nh);
496: second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
497: ipp = (ip_addr *) nh->u.ptr->data;
498: ip = ipp[0];
499: ip_ll = IPA_NONE;
500:
501: if (ipa_equal(ip, p->source_addr))
502: ip_ll = p->local_link;
503: else
504: {
505: /* If we send a route with 'third party' next hop destinated
506: * in the same interface, we should also send a link local
507: * next hop address. We use the received one (stored in the
508: * other part of BA_NEXT_HOP eattr). If we didn't received
509: * it (for example it is a static route), we can't use
510: * 'third party' next hop and we have to use local IP address
511: * as next hop. Sending original next hop address without
512: * link local address seems to be a natural way to solve that
513: * problem, but it is contrary to RFC 2545 and Quagga does not
514: * accept such routes.
515: *
516: * There are two cases, either we have global IP, or
517: * IPA_NONE if the neighbor is link-local. For IPA_NONE,
518: * we suppose it is on the same iface, see bgp_update_attrs().
519: */
520:
521: if (ipa_zero(ip) || same_iface(p, &ip))
522: {
523: if (second && ipa_nonzero(ipp[1]))
524: ip_ll = ipp[1];
525: else
526: {
527: switch (p->cf->missing_lladdr)
528: {
529: case MLL_SELF:
530: ip = p->source_addr;
531: ip_ll = p->local_link;
532: break;
533: case MLL_DROP:
534: log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
535: w = w_stored;
536: remains = rem_stored;
537: bgp_flush_prefixes(p, buck);
538: rem_node(&buck->send_node);
539: bgp_free_bucket(p, buck);
540: continue;
541: case MLL_IGNORE:
542: break;
543: }
544: }
545: }
546: }
547:
548: tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
549: *tmp++ = 0;
550: *tmp++ = BGP_AF_IPV6;
551: *tmp++ = 1;
552:
553: if (ipa_is_link_local(ip))
554: ip = IPA_NONE;
555:
556: if (ipa_nonzero(ip_ll))
557: {
558: *tmp++ = 32;
559: ipa_hton(ip);
560: memcpy(tmp, &ip, 16);
561: ipa_hton(ip_ll);
562: memcpy(tmp+16, &ip_ll, 16);
563: tmp += 32;
564: }
565: else
566: {
567: *tmp++ = 16;
568: ipa_hton(ip);
569: memcpy(tmp, &ip, 16);
570: tmp += 16;
571: }
572:
573: *tmp++ = 0; /* No SNPA information */
574: tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
575: ea->attrs[0].u.ptr->length = tmp - tstart;
576: size = bgp_encode_attrs(p, w, ea, remains);
577: ASSERT(size >= 0);
578: w += size;
579: break;
580: }
581: }
582:
583: size = w - (buf+4);
584: put_u16(buf+2, size);
585: lp_flush(bgp_linpool);
586: if (size)
587: {
588: BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
589: return w;
590: }
591: else
592: return NULL;
593: }
594:
595: static byte *
596: bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
597: {
598: struct bgp_proto *p = conn->bgp;
599: BGP_TRACE(D_PACKETS, "Sending END-OF-RIB");
600:
601: put_u16(buf+0, 0);
602: put_u16(buf+2, 6); /* length 4-9 */
603: buf += 4;
604:
605: /* Empty MP_UNREACH_NLRI atribute */
606: *buf++ = BAF_OPTIONAL;
607: *buf++ = BA_MP_UNREACH_NLRI;
608: *buf++ = 3; /* Length 7-9 */
609: *buf++ = 0; /* AFI */
610: *buf++ = BGP_AF_IPV6;
611: *buf++ = 1; /* SAFI */
612: return buf;
613: }
614:
615: #endif
616:
617: static inline byte *
618: bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
619: {
620: struct bgp_proto *p = conn->bgp;
621: BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");
622:
623: /* Original original route refresh request, RFC 2918 */
624: *buf++ = 0;
625: *buf++ = BGP_AF;
626: *buf++ = BGP_RR_REQUEST;
627: *buf++ = 1; /* SAFI */
628: return buf;
629: }
630:
631: static inline byte *
632: bgp_create_begin_refresh(struct bgp_conn *conn, byte *buf)
633: {
634: struct bgp_proto *p = conn->bgp;
635: BGP_TRACE(D_PACKETS, "Sending BEGIN-OF-RR");
636:
637: /* Demarcation of beginning of route refresh (BoRR), RFC 7313 */
638: *buf++ = 0;
639: *buf++ = BGP_AF;
640: *buf++ = BGP_RR_BEGIN;
641: *buf++ = 1; /* SAFI */
642: return buf;
643: }
644:
645: static inline byte *
646: bgp_create_end_refresh(struct bgp_conn *conn, byte *buf)
647: {
648: struct bgp_proto *p = conn->bgp;
649: BGP_TRACE(D_PACKETS, "Sending END-OF-RR");
650:
651: /* Demarcation of ending of route refresh (EoRR), RFC 7313 */
652: *buf++ = 0;
653: *buf++ = BGP_AF;
654: *buf++ = BGP_RR_END;
655: *buf++ = 1; /* SAFI */
656: return buf;
657: }
658:
659:
660: static void
661: bgp_create_header(byte *buf, uint len, uint type)
662: {
663: memset(buf, 0xff, 16); /* Marker */
664: put_u16(buf+16, len);
665: buf[18] = type;
666: }
667:
668: /**
669: * bgp_fire_tx - transmit packets
670: * @conn: connection
671: *
672: * Whenever the transmit buffers of the underlying TCP connection
673: * are free and we have any packets queued for sending, the socket functions
674: * call bgp_fire_tx() which takes care of selecting the highest priority packet
675: * queued (Notification > Keepalive > Open > Update), assembling its header
676: * and body and sending it to the connection.
677: */
678: static int
679: bgp_fire_tx(struct bgp_conn *conn)
680: {
681: struct bgp_proto *p = conn->bgp;
682: uint s = conn->packets_to_send;
683: sock *sk = conn->sk;
684: byte *buf, *pkt, *end;
685: int type;
686:
687: if (!sk)
688: {
689: conn->packets_to_send = 0;
690: return 0;
691: }
692: buf = sk->tbuf;
693: pkt = buf + BGP_HEADER_LENGTH;
694:
695: if (s & (1 << PKT_SCHEDULE_CLOSE))
696: {
697: /* We can finally close connection and enter idle state */
698: bgp_conn_enter_idle_state(conn);
699: return 0;
700: }
701: if (s & (1 << PKT_NOTIFICATION))
702: {
703: s = 1 << PKT_SCHEDULE_CLOSE;
704: type = PKT_NOTIFICATION;
705: end = bgp_create_notification(conn, pkt);
706: }
707: else if (s & (1 << PKT_KEEPALIVE))
708: {
709: s &= ~(1 << PKT_KEEPALIVE);
710: type = PKT_KEEPALIVE;
711: end = pkt; /* Keepalives carry no data */
712: BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
713: bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
714: }
715: else if (s & (1 << PKT_OPEN))
716: {
717: s &= ~(1 << PKT_OPEN);
718: type = PKT_OPEN;
719: end = bgp_create_open(conn, pkt);
720: }
721: else if (s & (1 << PKT_ROUTE_REFRESH))
722: {
723: s &= ~(1 << PKT_ROUTE_REFRESH);
724: type = PKT_ROUTE_REFRESH;
725: end = bgp_create_route_refresh(conn, pkt);
726: }
727: else if (s & (1 << PKT_BEGIN_REFRESH))
728: {
729: s &= ~(1 << PKT_BEGIN_REFRESH);
730: type = PKT_ROUTE_REFRESH; /* BoRR is a subtype of RR */
731: end = bgp_create_begin_refresh(conn, pkt);
732: }
733: else if (s & (1 << PKT_UPDATE))
734: {
735: type = PKT_UPDATE;
736: end = bgp_create_update(conn, pkt);
737:
738: if (!end)
739: {
740: /* No update to send, perhaps we need to send End-of-RIB or EoRR */
741:
742: conn->packets_to_send = 0;
743:
744: if (p->feed_state == BFS_LOADED)
745: {
746: type = PKT_UPDATE;
747: end = bgp_create_end_mark(conn, pkt);
748: }
749:
750: else if (p->feed_state == BFS_REFRESHED)
751: {
752: type = PKT_ROUTE_REFRESH;
753: end = bgp_create_end_refresh(conn, pkt);
754: }
755:
756: else /* Really nothing to send */
757: return 0;
758:
759: p->feed_state = BFS_NONE;
760: }
761: }
762: else
763: return 0;
764:
765: conn->packets_to_send = s;
766: bgp_create_header(buf, end - buf, type);
767: return sk_send(sk, end - buf);
768: }
769:
770: /**
771: * bgp_schedule_packet - schedule a packet for transmission
772: * @conn: connection
773: * @type: packet type
774: *
775: * Schedule a packet of type @type to be sent as soon as possible.
776: */
777: void
778: bgp_schedule_packet(struct bgp_conn *conn, int type)
779: {
780: DBG("BGP: Scheduling packet type %d\n", type);
781: conn->packets_to_send |= 1 << type;
782: if (conn->sk && conn->sk->tpos == conn->sk->tbuf && !ev_active(conn->tx_ev))
783: ev_schedule(conn->tx_ev);
784: }
785:
786: void
787: bgp_kick_tx(void *vconn)
788: {
789: struct bgp_conn *conn = vconn;
790:
791: DBG("BGP: kicking TX\n");
792: while (bgp_fire_tx(conn) > 0)
793: ;
794: }
795:
796: void
797: bgp_tx(sock *sk)
798: {
799: struct bgp_conn *conn = sk->data;
800:
801: DBG("BGP: TX hook\n");
802: while (bgp_fire_tx(conn) > 0)
803: ;
804: }
805:
806: /* Capatibility negotiation as per RFC 2842 */
807:
808: void
809: bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
810: {
811: // struct bgp_proto *p = conn->bgp;
812: int i, cl;
813:
814: while (len > 0)
815: {
816: if (len < 2 || len < 2 + opt[1])
817: goto err;
818:
819: cl = opt[1];
820:
821: switch (opt[0])
822: {
823: case 2: /* Route refresh capability, RFC 2918 */
824: if (cl != 0)
825: goto err;
826: conn->peer_refresh_support = 1;
827: break;
828:
829: case 6: /* Extended message length capability, draft */
830: if (cl != 0)
831: goto err;
832: conn->peer_ext_messages_support = 1;
833: break;
834:
835: case 64: /* Graceful restart capability, RFC 4724 */
836: if (cl % 4 != 2)
837: goto err;
838: conn->peer_gr_aware = 1;
839: conn->peer_gr_able = 0;
840: conn->peer_gr_time = get_u16(opt + 2) & 0x0fff;
841: conn->peer_gr_flags = opt[2] & 0xf0;
842: conn->peer_gr_aflags = 0;
843: for (i = 2; i < cl; i += 4)
844: if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
845: {
846: conn->peer_gr_able = 1;
847: conn->peer_gr_aflags = opt[2+i+3];
848: }
849: break;
850:
851: case 65: /* AS4 capability, RFC 4893 */
852: if (cl != 4)
853: goto err;
854: conn->peer_as4_support = 1;
855: if (conn->bgp->cf->enable_as4)
856: conn->advertised_as = get_u32(opt + 2);
857: break;
858:
859: case 69: /* ADD-PATH capability, draft */
860: if (cl % 4)
861: goto err;
862: for (i = 0; i < cl; i += 4)
863: if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
864: conn->peer_add_path = opt[2+i+3];
865: if (conn->peer_add_path > ADD_PATH_FULL)
866: goto err;
867: break;
868:
869: case 70: /* Enhanced route refresh capability, RFC 7313 */
870: if (cl != 0)
871: goto err;
872: conn->peer_enhanced_refresh_support = 1;
873: break;
874:
875: /* We can safely ignore all other capabilities */
876: }
877: len -= 2 + cl;
878: opt += 2 + cl;
879: }
880: return;
881:
882: err:
883: bgp_error(conn, 2, 0, NULL, 0);
884: return;
885: }
886:
887: static int
888: bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
889: {
890: struct bgp_proto *p = conn->bgp;
891: int ol;
892:
893: while (len > 0)
894: {
895: if (len < 2 || len < 2 + opt[1])
896: { bgp_error(conn, 2, 0, NULL, 0); return 0; }
897: #ifdef LOCAL_DEBUG
898: {
899: int i;
900: DBG("\tOption %02x:", opt[0]);
901: for(i=0; i<opt[1]; i++)
902: DBG(" %02x", opt[2+i]);
903: DBG("\n");
904: }
905: #endif
906:
907: ol = opt[1];
908: switch (opt[0])
909: {
910: case 2:
911: if (conn->start_state == BSS_CONNECT_NOCAP)
912: BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
913: else
914: bgp_parse_capabilities(conn, opt + 2, ol);
915: break;
916:
917: default:
918: /*
919: * BGP specs don't tell us to send which option
920: * we didn't recognize, but it's common practice
921: * to do so. Also, capability negotiation with
922: * Cisco routers doesn't work without that.
923: */
924: bgp_error(conn, 2, 4, opt, ol);
925: return 0;
926: }
927: len -= 2 + ol;
928: opt += 2 + ol;
929: }
930: return 0;
931: }
932:
933: static void
934: bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
935: {
936: struct bgp_conn *other;
937: struct bgp_proto *p = conn->bgp;
938: unsigned hold;
939: u16 base_as;
940: u32 id;
941:
942: /* Check state */
943: if (conn->state != BS_OPENSENT)
944: { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
945:
946: /* Check message contents */
947: if (len < 29 || len != 29U + pkt[28])
948: { bgp_error(conn, 1, 2, pkt+16, 2); return; }
949: if (pkt[19] != BGP_VERSION)
950: { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
951: conn->advertised_as = base_as = get_u16(pkt+20);
952: hold = get_u16(pkt+22);
953: id = get_u32(pkt+24);
954: BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);
955:
956: if (bgp_parse_options(conn, pkt+29, pkt[28]))
957: return;
958:
959: if (hold > 0 && hold < 3)
960: { bgp_error(conn, 2, 6, pkt+22, 2); return; }
961:
962: /* RFC 6286 2.2 - router ID is nonzero and AS-wide unique */
963: if (!id || (p->is_internal && id == p->local_id))
964: { bgp_error(conn, 2, 3, pkt+24, -4); return; }
965:
966: if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
967: log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);
968:
969: if (conn->advertised_as != p->remote_as)
970: {
971: if (conn->peer_as4_support)
972: {
973: u32 val = htonl(conn->advertised_as);
974: bgp_error(conn, 2, 2, (byte *) &val, 4);
975: }
976: else
977: bgp_error(conn, 2, 2, pkt+20, 2);
978:
979: return;
980: }
981:
982: /* Check the other connection */
983: other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
984: switch (other->state)
985: {
986: case BS_CONNECT:
987: case BS_ACTIVE:
988: /* Stop outgoing connection attempts */
989: bgp_conn_enter_idle_state(other);
990: break;
991:
992: case BS_IDLE:
993: case BS_OPENSENT:
994: case BS_CLOSE:
995: break;
996:
997: case BS_OPENCONFIRM:
998: /*
999: * Description of collision detection rules in RFC 4271 is confusing and
1000: * contradictory, but it is essentially:
1001: *
1002: * 1. Router with higher ID is dominant
1003: * 2. If both have the same ID, router with higher ASN is dominant [RFC6286]
1004: * 3. When both connections are in OpenConfirm state, one initiated by
1005: * the dominant router is kept.
1006: *
1007: * The first line in the expression below evaluates whether the neighbor
1008: * is dominant, the second line whether the new connection was initiated
1009: * by the neighbor. If both are true (or both are false), we keep the new
1010: * connection, otherwise we keep the old one.
1011: */
1012: if (((p->local_id < id) || ((p->local_id == id) && (p->local_as < p->remote_as)))
1013: == (conn == &p->incoming_conn))
1014: {
1015: /* Should close the other connection */
1016: BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
1017: bgp_error(other, 6, 7, NULL, 0);
1018: break;
1019: }
1020: /* Fall thru */
1021: case BS_ESTABLISHED:
1022: /* Should close this connection */
1023: BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
1024: bgp_error(conn, 6, 7, NULL, 0);
1025: return;
1026: default:
1027: bug("bgp_rx_open: Unknown state");
1028: }
1029:
1030: /* Update our local variables */
1031: conn->hold_time = MIN(hold, p->cf->hold_time);
1032: conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
1033: p->remote_id = id;
1034: p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
1035: p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
1036: p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
1037: p->gr_ready = p->cf->gr_mode && conn->peer_gr_able;
1038: p->ext_messages = p->cf->enable_extended_messages && conn->peer_ext_messages_support;
1039:
1040: if (p->add_path_tx)
1041: p->p.accept_ra_types = RA_ANY;
1042:
1043: DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
1044:
1045: bgp_schedule_packet(conn, PKT_KEEPALIVE);
1046: bgp_start_timer(conn->hold_timer, conn->hold_time);
1047: bgp_conn_enter_openconfirm_state(conn);
1048: }
1049:
1050:
1051: static inline void
1052: bgp_rx_end_mark(struct bgp_proto *p)
1053: {
1054: BGP_TRACE(D_PACKETS, "Got END-OF-RIB");
1055:
1056: if (p->load_state == BFS_LOADING)
1057: p->load_state = BFS_NONE;
1058:
1059: if (p->p.gr_recovery)
1060: proto_graceful_restart_unlock(&p->p);
1061:
1062: if (p->gr_active)
1063: bgp_graceful_restart_done(p);
1064: }
1065:
1066:
1067: #define DECODE_PREFIX(pp, ll) do { \
1068: if (p->add_path_rx) \
1069: { \
1070: if (ll < 5) { err=1; goto done; } \
1071: path_id = get_u32(pp); \
1072: pp += 4; \
1073: ll -= 4; \
1074: } \
1075: int b = *pp++; \
1076: int q; \
1077: ll--; \
1078: if (b > BITS_PER_IP_ADDRESS) { err=10; goto done; } \
1079: q = (b+7) / 8; \
1080: if (ll < q) { err=1; goto done; } \
1081: memcpy(&prefix, pp, q); \
1082: pp += q; \
1083: ll -= q; \
1084: ipa_ntoh(prefix); \
1085: prefix = ipa_and(prefix, ipa_mkmask(b)); \
1086: pxlen = b; \
1087: } while (0)
1088:
1089:
1090: static inline void
1091: bgp_rte_update(struct bgp_proto *p, ip_addr prefix, int pxlen,
1092: u32 path_id, u32 *last_id, struct rte_src **src,
1093: rta *a0, rta **a)
1094: {
1095: if (path_id != *last_id)
1096: {
1097: *src = rt_get_source(&p->p, path_id);
1098: *last_id = path_id;
1099:
1100: if (*a)
1101: {
1102: rta_free(*a);
1103: *a = NULL;
1104: }
1105: }
1106:
1107: /* Prepare cached route attributes */
1108: if (!*a)
1109: {
1110: a0->src = *src;
1111:
1112: /* Workaround for rta_lookup() breaking eattrs */
1113: ea_list *ea = a0->eattrs;
1114: *a = rta_lookup(a0);
1115: a0->eattrs = ea;
1116: }
1117:
1118: net *n = net_get(p->p.table, prefix, pxlen);
1119: rte *e = rte_get_temp(rta_clone(*a));
1120: e->net = n;
1121: e->pflags = 0;
1122: e->u.bgp.suppressed = 0;
1123: rte_update2(p->p.main_ahook, n, e, *src);
1124: }
1125:
1126: static inline void
1127: bgp_rte_withdraw(struct bgp_proto *p, ip_addr prefix, int pxlen,
1128: u32 path_id, u32 *last_id, struct rte_src **src)
1129: {
1130: if (path_id != *last_id)
1131: {
1132: *src = rt_find_source(&p->p, path_id);
1133: *last_id = path_id;
1134: }
1135:
1136: net *n = net_find(p->p.table, prefix, pxlen);
1137: rte_update2( p->p.main_ahook, n, NULL, *src);
1138: }
1139:
1140: static inline int
1141: bgp_set_next_hop(struct bgp_proto *p, rta *a)
1142: {
1143: struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
1144: ip_addr *nexthop = (ip_addr *) nh->u.ptr->data;
1145:
1146: #ifdef IPV6
1147: int second = (nh->u.ptr->length == NEXT_HOP_LENGTH) && ipa_nonzero(nexthop[1]);
1148:
1149: /* First address should not be link-local, but may be zero in direct mode */
1150: if (ipa_is_link_local(*nexthop))
1151: *nexthop = IPA_NONE;
1152: #else
1153: int second = 0;
1154: #endif
1155:
1156: if (p->cf->gw_mode == GW_DIRECT)
1157: {
1158: neighbor *ng = NULL;
1159:
1160: if (ipa_nonzero(*nexthop))
1161: ng = neigh_find(&p->p, nexthop, 0);
1162: else if (second) /* GW_DIRECT -> single_hop -> p->neigh != NULL */
1163: ng = neigh_find2(&p->p, nexthop + 1, p->neigh->iface, 0);
1164:
1165: /* Fallback */
1166: if (!ng)
1167: ng = p->neigh;
1168:
1169: if (ng->scope == SCOPE_HOST)
1170: return 0;
1171:
1172: a->dest = RTD_ROUTER;
1173: a->gw = ng->addr;
1174: a->iface = ng->iface;
1175: a->hostentry = NULL;
1176: a->igp_metric = 0;
1177: }
1178: else /* GW_RECURSIVE */
1179: {
1180: if (ipa_zero(*nexthop))
1181: return 0;
1182:
1183: rta_set_recursive_next_hop(p->p.table, a, p->igp_table, nexthop, nexthop + second);
1184: }
1185:
1186: return 1;
1187: }
1188:
1189: #ifndef IPV6 /* IPv4 version */
1190:
1191: static void
1192: bgp_do_rx_update(struct bgp_conn *conn,
1193: byte *withdrawn, int withdrawn_len,
1194: byte *nlri, int nlri_len,
1195: byte *attrs, int attr_len)
1196: {
1197: struct bgp_proto *p = conn->bgp;
1198: struct rte_src *src = p->p.main_source;
1199: rta *a0, *a = NULL;
1200: ip_addr prefix;
1201: int pxlen, err = 0;
1202: u32 path_id = 0;
1203: u32 last_id = 0;
1204:
1205: /* Check for End-of-RIB marker */
1206: if (!withdrawn_len && !attr_len && !nlri_len)
1207: {
1208: bgp_rx_end_mark(p);
1209: return;
1210: }
1211:
1212: /* Withdraw routes */
1213: while (withdrawn_len)
1214: {
1215: DECODE_PREFIX(withdrawn, withdrawn_len);
1216: DBG("Withdraw %I/%d\n", prefix, pxlen);
1217:
1218: bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1219: }
1220:
1221: if (!attr_len && !nlri_len) /* shortcut */
1222: return;
1223:
1224: a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
1225:
1226: if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1227: return;
1228:
1229: if (a0 && nlri_len && !bgp_set_next_hop(p, a0))
1230: a0 = NULL;
1231:
1232: last_id = 0;
1233: src = p->p.main_source;
1234:
1235: while (nlri_len)
1236: {
1237: DECODE_PREFIX(nlri, nlri_len);
1238: DBG("Add %I/%d\n", prefix, pxlen);
1239:
1240: if (a0)
1241: bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1242: else /* Forced withdraw as a result of soft error */
1243: bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1244: }
1245:
1246: done:
1247: if (a)
1248: rta_free(a);
1249:
1250: if (err)
1251: bgp_error(conn, 3, err, NULL, 0);
1252:
1253: return;
1254: }
1255:
1256: #else /* IPv6 version */
1257:
1258: #define DO_NLRI(name) \
1259: x = p->name##_start; \
1260: len = len0 = p->name##_len; \
1261: if (len) \
1262: { \
1263: if (len < 3) { err=9; goto done; } \
1264: af = get_u16(x); \
1265: x += 3; \
1266: len -= 3; \
1267: DBG("\tNLRI AF=%d sub=%d len=%d\n", af, x[-1], len);\
1268: } \
1269: else \
1270: af = 0; \
1271: if (af == BGP_AF_IPV6)
1272:
1273: static void
1274: bgp_attach_next_hop(rta *a0, byte *x)
1275: {
1276: ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
1277: memcpy(nh, x+1, 16);
1278: ipa_ntoh(nh[0]);
1279:
1280: /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
1281: if (*x == 32)
1282: {
1283: memcpy(nh+1, x+17, 16);
1284: ipa_ntoh(nh[1]);
1285: }
1286: else
1287: nh[1] = IPA_NONE;
1288: }
1289:
1290:
1291: static void
1292: bgp_do_rx_update(struct bgp_conn *conn,
1293: byte *withdrawn UNUSED, int withdrawn_len,
1294: byte *nlri UNUSED, int nlri_len,
1295: byte *attrs, int attr_len)
1296: {
1297: struct bgp_proto *p = conn->bgp;
1298: struct rte_src *src = p->p.main_source;
1299: byte *x;
1300: int len, len0;
1301: unsigned af;
1302: rta *a0, *a = NULL;
1303: ip_addr prefix;
1304: int pxlen, err = 0;
1305: u32 path_id = 0;
1306: u32 last_id = 0;
1307:
1308: p->mp_reach_len = 0;
1309: p->mp_unreach_len = 0;
1310: a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
1311:
1312: if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
1313: return;
1314:
1315: /* Check for End-of-RIB marker */
1316: if ((attr_len < 8) && !withdrawn_len && !nlri_len && !p->mp_reach_len &&
1317: (p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6))
1318: {
1319: bgp_rx_end_mark(p);
1320: return;
1321: }
1322:
1323: DO_NLRI(mp_unreach)
1324: {
1325: while (len)
1326: {
1327: DECODE_PREFIX(x, len);
1328: DBG("Withdraw %I/%d\n", prefix, pxlen);
1329: bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1330: }
1331: }
1332:
1333: DO_NLRI(mp_reach)
1334: {
1335: /* Create fake NEXT_HOP attribute */
1336: if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
1337: { err = 9; goto done; }
1338:
1339: if (a0)
1340: bgp_attach_next_hop(a0, x);
1341:
1342: /* Also ignore one reserved byte */
1343: len -= *x + 2;
1344: x += *x + 2;
1345:
1346: if (a0 && ! bgp_set_next_hop(p, a0))
1347: a0 = NULL;
1348:
1349: last_id = 0;
1350: src = p->p.main_source;
1351:
1352: while (len)
1353: {
1354: DECODE_PREFIX(x, len);
1355: DBG("Add %I/%d\n", prefix, pxlen);
1356:
1357: if (a0)
1358: bgp_rte_update(p, prefix, pxlen, path_id, &last_id, &src, a0, &a);
1359: else /* Forced withdraw as a result of soft error */
1360: bgp_rte_withdraw(p, prefix, pxlen, path_id, &last_id, &src);
1361: }
1362: }
1363:
1364: done:
1365: if (a)
1366: rta_free(a);
1367:
1368: if (err) /* Use subcode 9, not err */
1369: bgp_error(conn, 3, 9, NULL, 0);
1370:
1371: return;
1372: }
1373:
1374: #endif
1375:
1376: static void
1377: bgp_rx_update(struct bgp_conn *conn, byte *pkt, uint len)
1378: {
1379: struct bgp_proto *p = conn->bgp;
1380: byte *withdrawn, *attrs, *nlri;
1381: uint withdrawn_len, attr_len, nlri_len;
1382:
1383: BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");
1384:
1385: /* Workaround for some BGP implementations that skip initial KEEPALIVE */
1386: if (conn->state == BS_OPENCONFIRM)
1387: bgp_conn_enter_established_state(conn);
1388:
1389: if (conn->state != BS_ESTABLISHED)
1390: { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1391: bgp_start_timer(conn->hold_timer, conn->hold_time);
1392:
1393: /* Find parts of the packet and check sizes */
1394: if (len < 23)
1395: {
1396: bgp_error(conn, 1, 2, pkt+16, 2);
1397: return;
1398: }
1399: withdrawn = pkt + 21;
1400: withdrawn_len = get_u16(pkt + 19);
1401: if (withdrawn_len + 23 > len)
1402: goto malformed;
1403: attrs = withdrawn + withdrawn_len + 2;
1404: attr_len = get_u16(attrs - 2);
1405: if (withdrawn_len + attr_len + 23 > len)
1406: goto malformed;
1407: nlri = attrs + attr_len;
1408: nlri_len = len - withdrawn_len - attr_len - 23;
1409: if (!attr_len && nlri_len)
1410: goto malformed;
1411: DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);
1412:
1413: lp_flush(bgp_linpool);
1414:
1415: bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
1416: return;
1417:
1418: malformed:
1419: bgp_error(conn, 3, 1, NULL, 0);
1420: }
1421:
1422: static struct {
1423: byte major, minor;
1424: byte *msg;
1425: } bgp_msg_table[] = {
1426: { 1, 0, "Invalid message header" },
1427: { 1, 1, "Connection not synchronized" },
1428: { 1, 2, "Bad message length" },
1429: { 1, 3, "Bad message type" },
1430: { 2, 0, "Invalid OPEN message" },
1431: { 2, 1, "Unsupported version number" },
1432: { 2, 2, "Bad peer AS" },
1433: { 2, 3, "Bad BGP identifier" },
1434: { 2, 4, "Unsupported optional parameter" },
1435: { 2, 5, "Authentication failure" },
1436: { 2, 6, "Unacceptable hold time" },
1437: { 2, 7, "Required capability missing" }, /* [RFC5492] */
1438: { 2, 8, "No supported AFI/SAFI" }, /* This error msg is nonstandard */
1439: { 3, 0, "Invalid UPDATE message" },
1440: { 3, 1, "Malformed attribute list" },
1441: { 3, 2, "Unrecognized well-known attribute" },
1442: { 3, 3, "Missing mandatory attribute" },
1443: { 3, 4, "Invalid attribute flags" },
1444: { 3, 5, "Invalid attribute length" },
1445: { 3, 6, "Invalid ORIGIN attribute" },
1446: { 3, 7, "AS routing loop" }, /* Deprecated */
1447: { 3, 8, "Invalid NEXT_HOP attribute" },
1448: { 3, 9, "Optional attribute error" },
1449: { 3, 10, "Invalid network field" },
1450: { 3, 11, "Malformed AS_PATH" },
1451: { 4, 0, "Hold timer expired" },
1452: { 5, 0, "Finite state machine error" }, /* Subcodes are according to [RFC6608] */
1453: { 5, 1, "Unexpected message in OpenSent state" },
1454: { 5, 2, "Unexpected message in OpenConfirm state" },
1455: { 5, 3, "Unexpected message in Established state" },
1456: { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
1457: { 6, 1, "Maximum number of prefixes reached" },
1458: { 6, 2, "Administrative shutdown" },
1459: { 6, 3, "Peer de-configured" },
1460: { 6, 4, "Administrative reset" },
1461: { 6, 5, "Connection rejected" },
1462: { 6, 6, "Other configuration change" },
1463: { 6, 7, "Connection collision resolution" },
1464: { 6, 8, "Out of Resources" },
1465: { 7, 0, "Invalid ROUTE-REFRESH message" }, /* [RFC7313] */
1466: { 7, 1, "Invalid ROUTE-REFRESH message length" } /* [RFC7313] */
1467: };
1468:
1469: /**
1470: * bgp_error_dsc - return BGP error description
1471: * @code: BGP error code
1472: * @subcode: BGP error subcode
1473: *
1474: * bgp_error_dsc() returns error description for BGP errors
1475: * which might be static string or given temporary buffer.
1476: */
1477: const char *
1478: bgp_error_dsc(unsigned code, unsigned subcode)
1479: {
1480: static char buff[32];
1481: unsigned i;
1482: for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
1483: if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
1484: {
1485: return bgp_msg_table[i].msg;
1486: }
1487:
1488: bsprintf(buff, "Unknown error %d.%d", code, subcode);
1489: return buff;
1490: }
1491:
1492: void
1493: bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
1494: {
1495: const byte *name;
1496: byte *t, argbuf[36];
1497: unsigned i;
1498:
1499: /* Don't report Cease messages generated by myself */
1500: if (code == 6 && class == BE_BGP_TX)
1501: return;
1502:
1503: name = bgp_error_dsc(code, subcode);
1504: t = argbuf;
1505: if (len)
1506: {
1507: *t++ = ':';
1508: *t++ = ' ';
1509:
1510: if ((code == 2) && (subcode == 2) && ((len == 2) || (len == 4)))
1511: {
1512: /* Bad peer AS - we would like to print the AS */
1513: t += bsprintf(t, "%d", (len == 2) ? get_u16(data) : get_u32(data));
1514: goto done;
1515: }
1516: if (len > 16)
1517: len = 16;
1518: for (i=0; i<len; i++)
1519: t += bsprintf(t, "%02x", data[i]);
1520: }
1521: done:
1522: *t = 0;
1523: log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
1524: }
1525:
1526: static void
1527: bgp_rx_notification(struct bgp_conn *conn, byte *pkt, uint len)
1528: {
1529: struct bgp_proto *p = conn->bgp;
1530: if (len < 21)
1531: {
1532: bgp_error(conn, 1, 2, pkt+16, 2);
1533: return;
1534: }
1535:
1536: unsigned code = pkt[19];
1537: unsigned subcode = pkt[20];
1538: int err = (code != 6);
1539:
1540: bgp_log_error(p, BE_BGP_RX, "Received", code, subcode, pkt+21, len-21);
1541: bgp_store_error(p, conn, BE_BGP_RX, (code << 16) | subcode);
1542:
1543: #ifndef IPV6
1544: if ((code == 2) && ((subcode == 4) || (subcode == 7))
1545: /* Error related to capability:
1546: * 4 - Peer does not support capabilities at all.
1547: * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
1548: */
1549: && (p->cf->capabilities == 2)
1550: /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
1551: && (conn->start_state == BSS_CONNECT)
1552: /* Failed connection attempt have used capabilities */
1553: && (p->cf->remote_as <= 0xFFFF))
1554: /* Not possible with disabled capabilities */
1555: {
1556: /* We try connect without capabilities */
1557: log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
1558: p->start_state = BSS_CONNECT_NOCAP;
1559: err = 0;
1560: }
1561: #endif
1562:
1563: bgp_conn_enter_close_state(conn);
1564: bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
1565:
1566: if (err)
1567: {
1568: bgp_update_startup_delay(p);
1569: bgp_stop(p, 0);
1570: }
1571: }
1572:
1573: static void
1574: bgp_rx_keepalive(struct bgp_conn *conn)
1575: {
1576: struct bgp_proto *p = conn->bgp;
1577:
1578: BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
1579: bgp_start_timer(conn->hold_timer, conn->hold_time);
1580: switch (conn->state)
1581: {
1582: case BS_OPENCONFIRM:
1583: bgp_conn_enter_established_state(conn);
1584: break;
1585: case BS_ESTABLISHED:
1586: break;
1587: default:
1588: bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0);
1589: }
1590: }
1591:
1592: static void
1593: bgp_rx_route_refresh(struct bgp_conn *conn, byte *pkt, uint len)
1594: {
1595: struct bgp_proto *p = conn->bgp;
1596:
1597: if (conn->state != BS_ESTABLISHED)
1598: { bgp_error(conn, 5, fsm_err_subcode[conn->state], NULL, 0); return; }
1599:
1600: if (!p->cf->enable_refresh)
1601: { bgp_error(conn, 1, 3, pkt+18, 1); return; }
1602:
1603: if (len < (BGP_HEADER_LENGTH + 4))
1604: { bgp_error(conn, 1, 2, pkt+16, 2); return; }
1605:
1606: if (len > (BGP_HEADER_LENGTH + 4))
1607: { bgp_error(conn, 7, 1, pkt, MIN(len, 2048)); return; }
1608:
1609: /* FIXME - we ignore AFI/SAFI values, as we support
1610: just one value and even an error code for an invalid
1611: request is not defined */
1612:
1613: /* RFC 7313 redefined reserved field as RR message subtype */
1614: uint subtype = conn->peer_enhanced_refresh_support ? pkt[21] : BGP_RR_REQUEST;
1615:
1616: switch (subtype)
1617: {
1618: case BGP_RR_REQUEST:
1619: BGP_TRACE(D_PACKETS, "Got ROUTE-REFRESH");
1620: proto_request_feeding(&p->p);
1621: break;
1622:
1623: case BGP_RR_BEGIN:
1624: BGP_TRACE(D_PACKETS, "Got BEGIN-OF-RR");
1625: bgp_refresh_begin(p);
1626: break;
1627:
1628: case BGP_RR_END:
1629: BGP_TRACE(D_PACKETS, "Got END-OF-RR");
1630: bgp_refresh_end(p);
1631: break;
1632:
1633: default:
1634: log(L_WARN "%s: Got ROUTE-REFRESH message with unknown subtype %u, ignoring",
1635: p->p.name, subtype);
1636: break;
1637: }
1638: }
1639:
1640:
1641: /**
1642: * bgp_rx_packet - handle a received packet
1643: * @conn: BGP connection
1644: * @pkt: start of the packet
1645: * @len: packet size
1646: *
1647: * bgp_rx_packet() takes a newly received packet and calls the corresponding
1648: * packet handler according to the packet type.
1649: */
1650: static void
1651: bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
1652: {
1653: byte type = pkt[18];
1654:
1655: DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
1656:
1657: if (conn->bgp->p.mrtdump & MD_MESSAGES)
1658: mrt_dump_bgp_packet(conn, pkt, len);
1659:
1660: switch (type)
1661: {
1662: case PKT_OPEN: return bgp_rx_open(conn, pkt, len);
1663: case PKT_UPDATE: return bgp_rx_update(conn, pkt, len);
1664: case PKT_NOTIFICATION: return bgp_rx_notification(conn, pkt, len);
1665: case PKT_KEEPALIVE: return bgp_rx_keepalive(conn);
1666: case PKT_ROUTE_REFRESH: return bgp_rx_route_refresh(conn, pkt, len);
1667: default: bgp_error(conn, 1, 3, pkt+18, 1);
1668: }
1669: }
1670:
1671: /**
1672: * bgp_rx - handle received data
1673: * @sk: socket
1674: * @size: amount of data received
1675: *
1676: * bgp_rx() is called by the socket layer whenever new data arrive from
1677: * the underlying TCP connection. It assembles the data fragments to packets,
1678: * checks their headers and framing and passes complete packets to
1679: * bgp_rx_packet().
1680: */
1681: int
1682: bgp_rx(sock *sk, uint size)
1683: {
1684: struct bgp_conn *conn = sk->data;
1685: struct bgp_proto *p = conn->bgp;
1686: byte *pkt_start = sk->rbuf;
1687: byte *end = pkt_start + size;
1688: unsigned i, len;
1689:
1690: DBG("BGP: RX hook: Got %d bytes\n", size);
1691: while (end >= pkt_start + BGP_HEADER_LENGTH)
1692: {
1693: if ((conn->state == BS_CLOSE) || (conn->sk != sk))
1694: return 0;
1695: for(i=0; i<16; i++)
1696: if (pkt_start[i] != 0xff)
1697: {
1698: bgp_error(conn, 1, 1, NULL, 0);
1699: break;
1700: }
1701: len = get_u16(pkt_start+16);
1702: if (len < BGP_HEADER_LENGTH || len > bgp_max_packet_length(p))
1703: {
1704: bgp_error(conn, 1, 2, pkt_start+16, 2);
1705: break;
1706: }
1707: if (end < pkt_start + len)
1708: break;
1709: bgp_rx_packet(conn, pkt_start, len);
1710: pkt_start += len;
1711: }
1712: if (pkt_start != sk->rbuf)
1713: {
1714: memmove(sk->rbuf, pkt_start, end - pkt_start);
1715: sk->rpos = sk->rbuf + (end - pkt_start);
1716: }
1717: return 0;
1718: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>